Example #1
0
    def provide_gcp_credential_file_as_context(self):
        """
        Context manager that provides a Google Cloud credentials for application supporting `Application
        Default Credentials (ADC) strategy <https://cloud.google.com/docs/authentication/production>`__.

        It can be used to provide credentials for external programs (e.g. gcloud) that expect authorization
        file in ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable.
        """
        key_path = self._get_field('key_path',
                                   None)  # type: Optional[str]    #
        keyfile_dict = self._get_field('keyfile_dict',
                                       None)  # type: Optional[Dict]
        if key_path and keyfile_dict:
            raise AirflowException(
                "The `keyfile_dict` and `key_path` fields are mutually exclusive. "
                "Please provide only one value.")
        elif key_path:
            if key_path.endswith('.p12'):
                raise AirflowException(
                    'Legacy P12 key file are not supported, use a JSON key file.'
                )
            with patch_environ({CREDENTIALS: key_path}):
                yield key_path
        elif keyfile_dict:
            with tempfile.NamedTemporaryFile(mode='w+t') as conf_file:
                conf_file.write(keyfile_dict)
                conf_file.flush()
                with patch_environ({CREDENTIALS: conf_file.name}):
                    yield conf_file.name
        else:
            # We will use the default service account credentials.
            yield None
def provide_gcp_connection(
    key_file_path: Optional[str] = None,
    scopes: Optional[Sequence] = None,
    project_id: Optional[str] = None,
):
    """
    Context manager that provides a temporary value of :envvar:`AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT`
    connection. It build a new connection that includes path to provided service json,
    required scopes and project id.

    :param key_file_path: Path to file with GCP credentials .json file.
    :type key_file_path: str
    :param scopes: OAuth scopes for the connection
    :type scopes: Sequence
    :param project_id: The id of GCP project for the connection.
    :type project_id: str
    """
    if key_file_path and key_file_path.endswith(".p12"):
        raise AirflowException(
            "Legacy P12 key file are not supported, use a JSON key file.")

    conn = build_gcp_conn(scopes=scopes,
                          key_file_path=key_file_path,
                          project_id=project_id)

    with patch_environ({AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT: conn}):
        yield
Example #3
0
def provide_gcp_conn_and_credentials(
    key_file_path: Optional[str] = None,
    scopes: Optional[Sequence] = None,
    project_id: Optional[str] = None,
) -> Generator[None, None, None]:
    """
    Context manager that provides both:

    - Google Cloud credentials for application supporting `Application Default Credentials (ADC)
      strategy`__.
    - temporary value of :envvar:`AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT` connection

    :param key_file_path: Path to file with Google Cloud Service Account .json file.
    :param scopes: OAuth scopes for the connection
    :param project_id: The id of Google Cloud project for the connection.

    __ https://cloud.google.com/docs/authentication/production
    """
    with ExitStack() as stack:
        if key_file_path:
            stack.enter_context(provide_gcp_credentials(key_file_path))  # type; ignore
        if project_id:
            stack.enter_context(  # type; ignore
                patch_environ({PROJECT: project_id, LEGACY_PROJECT: project_id})
            )

        stack.enter_context(provide_gcp_connection(key_file_path, scopes, project_id))  # type; ignore
        yield
Example #4
0
    def execute(self, context):
        hook = GoogleBaseHook(gcp_conn_id=self.gcp_conn_id)
        self.project_id = self.project_id or hook.project_id

        if not self.project_id:
            raise AirflowException(
                "The project id must be passed either as "
                "keyword project_id parameter or as project_id extra "
                "in GCP connection definition. Both are not set!")

        # Write config to a temp file and set the environment variable to point to it.
        # This is to avoid race conditions of reading/writing a single file
        with tempfile.NamedTemporaryFile() as conf_file,\
                patch_environ({KUBE_CONFIG_ENV_VAR: conf_file.name}), \
                hook.provide_authorized_gcloud():
            # Attempt to get/update credentials
            # We call gcloud directly instead of using google-cloud-python api
            # because there is no way to write kubernetes config to a file, which is
            # required by KubernetesPodOperator.
            # The gcloud command looks at the env variable `KUBECONFIG` for where to save
            # the kubernetes config file.
            cmd = [
                "gcloud", "container", "clusters", "get-credentials",
                self.cluster_name, "--zone", self.location, "--project",
                self.project_id
            ]
            if self.use_internal_ip:
                cmd.append('--internal-ip')
            execute_in_subprocess(cmd)

            # Tell `KubernetesPodOperator` where the config file is located
            self.config_file = os.environ[KUBE_CONFIG_ENV_VAR]
            return super().execute(context)
Example #5
0
    def provide_authorized_gcloud(self) -> Generator[None, None, None]:
        """
        Provides a separate gcloud configuration with current credentials.

        The gcloud tool allows you to login to Google Cloud only - ``gcloud auth login`` and
        for the needs of Application Default Credentials ``gcloud auth application-default login``.
        In our case, we want all commands to use only the credentials from ADCm so
        we need to configure the credentials in gcloud manually.
        """
        credentials_path = _cloud_sdk.get_application_default_credentials_path(
        )
        project_id = self.project_id

        with ExitStack() as exit_stack:
            exit_stack.enter_context(
                self.provide_gcp_credential_file_as_context())
            gcloud_config_tmp = exit_stack.enter_context(
                tempfile.TemporaryDirectory())
            exit_stack.enter_context(
                patch_environ({CLOUD_SDK_CONFIG_DIR: gcloud_config_tmp}))

            if CREDENTIALS in os.environ:
                # This solves most cases when we are logged in using the service key in Airflow.
                # Don't display stdout/stderr for security reason
                check_output([
                    "gcloud",
                    "auth",
                    "activate-service-account",
                    f"--key-file={os.environ[CREDENTIALS]}",
                ])
            elif os.path.exists(credentials_path):
                # If we are logged in by `gcloud auth application-default` then we need to log in manually.
                # This will make the `gcloud auth application-default` and `gcloud auth` credentials equals.
                with open(credentials_path) as creds_file:
                    creds_content = json.loads(creds_file.read())
                    # Don't display stdout/stderr for security reason
                    check_output([
                        "gcloud", "config", "set", "auth/client_id",
                        creds_content["client_id"]
                    ])
                    # Don't display stdout/stderr for security reason
                    check_output([
                        "gcloud", "config", "set", "auth/client_secret",
                        creds_content["client_secret"]
                    ])
                    # Don't display stdout/stderr for security reason
                    check_output([
                        "gcloud",
                        "auth",
                        "activate-refresh-token",
                        creds_content["client_id"],
                        creds_content["refresh_token"],
                    ])

            if project_id:
                # Don't display stdout/stderr for security reason
                check_output(
                    ["gcloud", "config", "set", "core/project", project_id])

            yield
def provide_salesforce_connection(key_file_path: str):
    """
    Context manager that provides a temporary value of SALESFORCE_DEFAULT connection.

    :param key_file_path: Path to file with SALESFORCE credentials .json file.
    :type key_file_path: str
    """
    if not key_file_path.endswith(".json"):
        raise AirflowException("Use a JSON key file.")
    with open(key_file_path, 'r') as credentials:
        creds = json.load(credentials)
    missing_keys = CONFIG_REQUIRED_FIELDS - creds.keys()
    if missing_keys:
        message = "{missing_keys} fields are missing".format(
            missing_keys=missing_keys)
        raise AirflowException(message)
    conn = Connection(
        conn_id=SALESFORCE_CONNECTION_ID,
        conn_type=CONNECTION_TYPE,
        host=creds["host"],
        login=creds["login"],
        password=creds["password"],
        extra=json.dumps({"security_token": creds["security_token"]}),
    )
    with patch_environ(
        {f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}):
        yield
Example #7
0
def provide_azure_data_lake_default_connection(key_file_path: str):
    """
    Context manager to provide a temporary value for azure_data_lake_default connection
    :param key_file_path: Path to file with azure_data_lake_default credentials .json file.
    :type key_file_path: str
    """
    required_fields = {'login', 'password', 'extra'}

    if not key_file_path.endswith(".json"):
        raise AirflowException("Use a JSON key file.")
    with open(key_file_path) as credentials:
        creds = json.load(credentials)
    missing_keys = required_fields - creds.keys()
    if missing_keys:
        message = f"{missing_keys} fields are missing"
        raise AirflowException(message)
    conn = Connection(
        conn_id=DATA_LAKE_CONNECTION_ID,
        conn_type=DATA_LAKE_CONNECTION_TYPE,
        host=creds.get("host", None),
        login=creds.get("login", None),
        password=creds.get("password", None),
        extra=json.dumps(creds.get('extra', None)),
    )
    with patch_environ({f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}):
        yield
def provide_gcp_conn_and_credentials(
    key_file_path: Optional[str] = None,
    scopes: Optional[Sequence] = None,
    project_id: Optional[str] = None,
):
    """
    Context manager that provides both:

    - GCP credentials for application supporting `Application Default Credentials (ADC)
    strategy <https://cloud.google.com/docs/authentication/production>`__.
    - temporary value of :envvar:`AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT` connection

    :param key_file_path: Path to file with GCP credentials .json file.
    :type key_file_path: str
    :param scopes: OAuth scopes for the connection
    :type scopes: Sequence
    :param project_id: The id of GCP project for the connection.
    :type project_id: str
    """
    with ExitStack() as stack:
        if key_file_path:
            stack.enter_context(  # type; ignore  # pylint: disable=no-member
                provide_gcp_credentials(key_file_path))
        if project_id:
            stack.enter_context(  # type; ignore  # pylint: disable=no-member
                patch_environ({
                    PROJECT: project_id,
                    LEGACY_PROJECT: project_id
                }))

        stack.enter_context(  # type; ignore  # pylint: disable=no-member
            provide_gcp_connection(key_file_path, scopes, project_id))
        yield
def provide_gcp_credentials(key_file_path: Optional[str] = None,
                            key_file_dict: Optional[Dict] = None):
    """
    Context manager that provides a GCP credentials for application supporting `Application
    Default Credentials (ADC) strategy <https://cloud.google.com/docs/authentication/production>`__.

    It can be used to provide credentials for external programs (e.g. gcloud) that expect authorization
    file in ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable.

    :param key_file_path: Path to file with GCP credentials .json file.
    :type key_file_path: str
    :param key_file_dict: Dictionary with credentials.
    :type key_file_dict: Dict
    """
    if not key_file_path and not key_file_dict:
        raise ValueError("Please provide `key_file_path` or `key_file_dict`.")

    if key_file_path and key_file_path.endswith(".p12"):
        raise AirflowException(
            "Legacy P12 key file are not supported, use a JSON key file.")

    with tempfile.NamedTemporaryFile(mode="w+t") as conf_file:
        if not key_file_path and key_file_dict:
            conf_file.write(json.dumps(key_file_dict))
            conf_file.flush()
            key_file_path = conf_file.name
        if key_file_path:
            with patch_environ({CREDENTIALS: key_file_path}):
                yield
        else:
            # We will use the default service account credentials.
            yield
Example #10
0
def cheat_sheet(args):
    """Display cheat-sheet."""
    with contextlib.ExitStack() as exit_stack:
        if not is_terminal_support_colors():
            exit_stack.enter_context(patch_environ({ANSI_COLORS_DISABLED: "1"}))
        cprint("List of all commands:".upper(), attrs=["bold", "underline"])
        print()
        display_commands_index()
Example #11
0
    def execute(self, context: 'Context') -> Optional[str]:
        hook = GoogleBaseHook(gcp_conn_id=self.gcp_conn_id)
        self.project_id = self.project_id or hook.project_id

        if not self.project_id:
            raise AirflowException(
                "The project id must be passed either as "
                "keyword project_id parameter or as project_id extra "
                "in Google Cloud connection definition. Both are not set!")

        # Write config to a temp file and set the environment variable to point to it.
        # This is to avoid race conditions of reading/writing a single file
        with tempfile.NamedTemporaryFile() as conf_file, patch_environ(
            {KUBE_CONFIG_ENV_VAR:
             conf_file.name}), hook.provide_authorized_gcloud():
            # Attempt to get/update credentials
            # We call gcloud directly instead of using google-cloud-python api
            # because there is no way to write kubernetes config to a file, which is
            # required by KubernetesPodOperator.
            # The gcloud command looks at the env variable `KUBECONFIG` for where to save
            # the kubernetes config file.
            cmd = [
                "gcloud",
                "container",
                "clusters",
                "get-credentials",
                self.cluster_name,
                "--project",
                self.project_id,
            ]
            if self.impersonation_chain:
                if isinstance(self.impersonation_chain, str):
                    impersonation_account = self.impersonation_chain
                elif len(self.impersonation_chain) == 1:
                    impersonation_account = self.impersonation_chain[0]
                else:
                    raise AirflowException(
                        "Chained list of accounts is not supported, please specify only one service account"
                    )

                cmd.extend([
                    '--impersonate-service-account',
                    impersonation_account,
                ])
            if self.regional:
                cmd.append('--region')
            else:
                cmd.append('--zone')
            cmd.append(self.location)
            if self.use_internal_ip:
                cmd.append('--internal-ip')
            execute_in_subprocess(cmd)

            # Tell `KubernetesPodOperator` where the config file is located
            self.config_file = os.environ[KUBE_CONFIG_ENV_VAR]
            return super().execute(context)
Example #12
0
    def test_should_update_variable_and_restore_state_when_exit(self):
        with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}):
            del os.environ["TEST_NOT_EXISTS"]

            self.assertEqual("BEFORE", os.environ["TEST_EXISTS"])
            self.assertNotIn("TEST_NOT_EXISTS", os.environ)

            with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}):
                self.assertEqual("AFTER", os.environ["TEST_NOT_EXISTS"])
                self.assertEqual("AFTER", os.environ["TEST_EXISTS"])

            self.assertEqual("BEFORE", os.environ["TEST_EXISTS"])
            self.assertNotIn("TEST_NOT_EXISTS", os.environ)
Example #13
0
    def test_should_update_variable_and_restore_state_when_exit(self):
        with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}):
            del os.environ["TEST_NOT_EXISTS"]

            assert "BEFORE" == os.environ["TEST_EXISTS"]
            assert "TEST_NOT_EXISTS" not in os.environ

            with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}):
                assert "AFTER" == os.environ["TEST_NOT_EXISTS"]
                assert "AFTER" == os.environ["TEST_EXISTS"]

            assert "BEFORE" == os.environ["TEST_EXISTS"]
            assert "TEST_NOT_EXISTS" not in os.environ
Example #14
0
    def test_should_restore_state_when_exception(self):
        with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}):
            del os.environ["TEST_NOT_EXISTS"]

            self.assertEqual("BEFORE", os.environ["TEST_EXISTS"])
            self.assertNotIn("TEST_NOT_EXISTS", os.environ)

            with suppress(AirflowException):
                with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}):
                    self.assertEqual("AFTER", os.environ["TEST_NOT_EXISTS"])
                    self.assertEqual("AFTER", os.environ["TEST_EXISTS"])
                    raise AirflowException("Unknown excepiton")

            self.assertEqual("BEFORE", os.environ["TEST_EXISTS"])
            self.assertNotIn("TEST_NOT_EXISTS", os.environ)
Example #15
0
    def test_should_restore_state_when_exception(self):
        with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}):
            del os.environ["TEST_NOT_EXISTS"]

            assert "BEFORE" == os.environ["TEST_EXISTS"]
            assert "TEST_NOT_EXISTS" not in os.environ

            with suppress(AirflowException):
                with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}):
                    assert "AFTER" == os.environ["TEST_NOT_EXISTS"]
                    assert "AFTER" == os.environ["TEST_EXISTS"]
                    raise AirflowException("Unknown exception")

            assert "BEFORE" == os.environ["TEST_EXISTS"]
            assert "TEST_NOT_EXISTS" not in os.environ
Example #16
0
def provide_wasb_default_connection(key_file_path: str):
    """
    Context manager to provide a temporary value for wasb_default connection

    :param key_file_path: Path to file with wasb_default credentials .json file.
    :type key_file_path: str
    """
    if not key_file_path.endswith(".json"):
        raise AirflowException("Use a JSON key file.")
    with open(key_file_path) as credentials:
        creds = json.load(credentials)
    conn = Connection(
        conn_id=WASB_CONNECTION_ID,
        conn_type="wasb",
        host=creds.get("host", None),
        login=creds.get("login", None),
        password=creds.get("password", None),
        extra=json.dumps(creds.get('extra', None)),
    )
    with patch_environ({f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}):
        yield
def provide_facebook_connection(key_file_path: str):
    """
    Context manager that provides a temporary value of AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT
    connection. It build a new connection that includes path to provided service json,
    required scopes and project id.

    :param key_file_path: Path to file with FACEBOOK credentials .json file.
    :type key_file_path: str
    """
    if not key_file_path.endswith(".json"):
        raise AirflowException("Use a JSON key file.")
    with open(key_file_path) as credentials:
        creds = json.load(credentials)
    missing_keys = CONFIG_REQUIRED_FIELDS - creds.keys()
    if missing_keys:
        message = f"{missing_keys} fields are missing"
        raise AirflowException(message)
    conn = Connection(conn_id=FACEBOOK_CONNECTION_ID,
                      conn_type=CONNECTION_TYPE,
                      extra=json.dumps(creds))
    with patch_environ(
        {f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}):
        yield
Example #18
0
def provide_leveldb_connection():
    """Context manager that provides a temporary value of AIRFLOW_CONN_LEVELDB_DEFAULT connection"""
    conn = Connection(conn_id=LEVELDB_CONNECTION_ID, conn_type=CONNECTION_TYPE)
    with patch_environ({f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}):
        yield