예제 #1
0
def diagnose_me(ctx, json, project_id, namespace):
    """Runs environment diagnostic with specified parameters.

    Feature stage:
    [Alpha](https://github.com/kubeflow/pipelines/blob/07328e5094ac2981d3059314cc848fbb71437a76/docs/release/feature-stages.md#alpha)
    """
    # validate kubectl, gcloud , and gsutil exist
    local_env_gcloud_sdk = gcp.get_gcp_configuration(
        gcp.Commands.GET_GCLOUD_VERSION,
        project_id=project_id,
        human_readable=False)
    for app in ['Google Cloud SDK', 'gsutil', 'kubectl']:
        if app not in local_env_gcloud_sdk.json_output:
            raise RuntimeError(
                '%s is not installed, gcloud, gsutil and kubectl are required '
                % app + 'for this app to run. Please follow instructions at ' +
                'https://cloud.google.com/sdk/install to install the SDK.')

    click.echo('Collecting diagnostic information ...', file=sys.stderr)

    # default behaviour dump all configurations
    results = {}
    for gcp_command in gcp.Commands:
        results[gcp_command] = gcp.get_gcp_configuration(
            gcp_command, project_id=project_id, human_readable=not json)

    for k8_command in k8.Commands:
        results[k8_command] = k8.get_kubectl_configuration(
            k8_command, human_readable=not json)

    for dev_env_command in dev_env.Commands:
        results[dev_env_command] = dev_env.get_dev_env_configuration(
            dev_env_command, human_readable=not json)

    print_to_sdtout(results, not json)
예제 #2
0
def diagnose_me(ctx: click.Context, json: bool, project_id: str,
                namespace: str):
    """Runs KFP environment diagnostic."""
    # validate kubectl, gcloud , and gsutil exist
    local_env_gcloud_sdk = gcp.get_gcp_configuration(
        gcp.Commands.GET_GCLOUD_VERSION,
        project_id=project_id,
        human_readable=False)
    for app in ['Google Cloud SDK', 'gsutil', 'kubectl']:
        if app not in local_env_gcloud_sdk.json_output:
            raise RuntimeError(
                f'{app} is not installed, gcloud, gsutil and kubectl are required '
                + 'for this app to run. Please follow instructions at ' +
                'https://cloud.google.com/sdk/install to install the SDK.')

    click.echo('Collecting diagnostic information ...', file=sys.stderr)

    # default behaviour dump all configurations
    results: ResultsType = {
        gcp_command: gcp.get_gcp_configuration(gcp_command,
                                               project_id=project_id,
                                               human_readable=not json)
        for gcp_command in gcp.Commands
    }

    for k8_command in k8.Commands:
        results[k8_command] = k8.get_kubectl_configuration(
            k8_command, human_readable=not json)

    for dev_env_command in dev_env.Commands:
        results[dev_env_command] = dev_env.get_dev_env_configuration(
            dev_env_command, human_readable=not json)

    print_to_sdtout(results, not json)
예제 #3
0
def run_diagnose_me(
    bucket: str,
    execution_mode: str,
    project_id: str,
    target_apis: str,
    quota_check: list = None,
) -> NamedTuple('Outputs', [('bucket', str), ('project_id', str)]):
    """ Performs environment verification specific to this pipeline.

      args:
          bucket:
              string name of the bucket to be checked. Must be of the format
              gs://bucket_root/any/path/here/is/ignored where any path beyond root
              is ignored.
          execution_mode:
              If set to HALT_ON_ERROR will case any error to raise an exception.
              This is intended to stop the data processing of a pipeline. Can set
              to False to only report Errors/Warnings.
          project_id:
              GCP project ID which is assumed to be the project under which
              current pod is executing.
          target_apis:
              String consisting of a comma separated list of apis to be verified.
          quota_check:
              List of entries describing how much quota is required. Each entry
              has three fields: region, metric and quota_needed. All
              string-typed.
      Raises:
          RuntimeError: If configuration is not setup properly and
          HALT_ON_ERROR flag is set.
      """

    # Installing pip3 and kfp, since the base image 'google/cloud-sdk:279.0.0'
    # does not come with pip3 pre-installed.
    import subprocess
    subprocess.run(
        ['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],
        capture_output=True)
    subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],
                   capture_output=True)
    subprocess.run(['python3', 'get-pip.py'], capture_output=True)
    subprocess.run(
        ['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],
        capture_output=True)

    import sys
    from kfp.cli.diagnose_me import gcp

    config_error_observed = False

    quota_list = gcp.get_gcp_configuration(gcp.Commands.GET_QUOTAS,
                                           human_readable=False)

    if quota_list.has_error:
        print('Failed to retrieve project quota with error %s\n' %
              (quota_list.stderr))
        config_error_observed = True
    else:
        # Check quota.
        quota_dict = {}  # Mapping from region to dict[metric, available]
        for region_quota in quota_list.json_output:
            quota_dict[region_quota['name']] = {}
            for quota in region_quota['quotas']:
                quota_dict[region_quota['name']][
                    quota['metric']] = quota['limit'] - quota['usage']

        quota_check = [] or quota_check
        for single_check in quota_check:
            if single_check['region'] not in quota_dict:
                print(
                    'Regional quota for %s does not exist in current project.\n'
                    % (single_check['region']))
                config_error_observed = True
            else:
                if quota_dict[single_check['region']][
                        single_check['metric']] < single_check['quota_needed']:
                    print(
                        'Insufficient quota observed for %s at %s: %s is needed but only %s is available.\n'
                        % (single_check['metric'], single_check['region'],
                           str(single_check['quota_needed']),
                           str(quota_dict[single_check['region']][
                               single_check['metric']])))
                    config_error_observed = True

    # Get the project ID
    # from project configuration
    project_config = gcp.get_gcp_configuration(gcp.Commands.GET_GCLOUD_DEFAULT,
                                               human_readable=False)
    if not project_config.has_error:
        auth_project_id = project_config.parsed_output['core']['project']
        print(
            'GCP credentials are configured with access to project: %s ...\n' %
            (project_id))
        print('Following account(s) are active under this pipeline:\n')
        subprocess.run(['gcloud', 'auth', 'list', '--format', 'json'])
        print('\n')
    else:
        print('Project configuration is not accessible with error  %s\n' %
              (project_config.stderr),
              file=sys.stderr)
        config_error_observed = True

    if auth_project_id != project_id:
        print(
            'User provided project ID %s does not match the configuration %s\n'
            % (project_id, auth_project_id),
            file=sys.stderr)
        config_error_observed = True

    # Get project buckets
    get_project_bucket_results = gcp.get_gcp_configuration(
        gcp.Commands.GET_STORAGE_BUCKETS, human_readable=False)

    if get_project_bucket_results.has_error:
        print('could not retrieve project buckets with error: %s' %
              (get_project_bucket_results.stderr),
              file=sys.stderr)
        config_error_observed = True

    # Get the root of the user provided bucket i.e. gs://root.
    bucket_root = '/'.join(bucket.split('/')[0:3])

    print(
        'Checking to see if the provided GCS bucket\n  %s\nis accessible ...\n'
        % (bucket))

    if bucket_root in get_project_bucket_results.json_output:
        print(
            'Provided bucket \n   %s\nis accessible within the project\n   %s\n'
            % (bucket, project_id))

    else:
        print(
            'Could not find the bucket %s in project %s' %
            (bucket, project_id) +
            'Please verify that you have provided the correct GCS bucket name.\n'
            + 'Only the following buckets are visible in this project:\n%s' %
            (get_project_bucket_results.parsed_output),
            file=sys.stderr)
        config_error_observed = True

    # Verify APIs that are required are enabled
    api_config_results = gcp.get_gcp_configuration(gcp.Commands.GET_APIS)

    api_status = {}

    if api_config_results.has_error:
        print('could not retrieve API status with error: %s' %
              (api_config_results.stderr),
              file=sys.stderr)
        config_error_observed = True

    print('Checking APIs status ...')
    for item in api_config_results.parsed_output:
        api_status[item['config']['name']] = item['state']
        # printing the results in stdout for logging purposes
        print('%s %s' % (item['config']['name'], item['state']))

    # Check if target apis are enabled
    api_check_results = True
    for api in target_apis.replace(' ', '').split(','):
        if 'ENABLED' != api_status.get(api, 'DISABLED'):
            api_check_results = False
            print(
                'API \"%s\" is not accessible or not enabled. To enable this api go to '
                % (api) +
                'https://console.cloud.google.com/apis/library/%s?project=%s' %
                (api, project_id),
                file=sys.stderr)
            config_error_observed = True

    if 'HALT_ON_ERROR' in execution_mode and config_error_observed:
        raise RuntimeError(
            'There was an error in your environment configuration.\n' +
            'Note that resolving such issues generally require a deep knowledge of Kubernetes.\n'
            + '\n' +
            'We highly recommend that you recreate the cluster and check "Allow access ..." \n'
            +
            'checkbox during cluster creation to have the cluster configured automatically.\n'
            +
            'For more information on this and other troubleshooting instructions refer to\n'
            + 'our troubleshooting guide.\n' + '\n' +
            'If you have intentionally modified the cluster configuration, you may\n'
            +
            'bypass this error by removing the execution_mode HALT_ON_ERROR flag.\n'
        )

    return (project_id, bucket)
예제 #4
0
 def test_project_configuration_gsutil(self, mock_execute_gsutil_command):
     """Test Gsutil commands."""
     gcp.get_gcp_configuration(gcp.Commands.GET_STORAGE_BUCKETS)
     mock_execute_gsutil_command.assert_called_once_with(['ls'],
                                                         project_id=None)
예제 #5
0
 def test_project_configuration_gcloud(self, mock_execute_gcloud_command):
     """Tests gcloud commands."""
     gcp.get_gcp_configuration(gcp.Commands.GET_APIS)
     mock_execute_gcloud_command.assert_called_once_with(
         ['services', 'list'], project_id=None, human_readable=False)