def test_should_respond_200_with_task_state_in_removed(self, session):
     self.create_task_instances(session, task_instances=[{"state": State.REMOVED}], update_extras=True)
     response = self.client.get(
         "/api/v1/dags/example_python_operator/dagRuns/TEST_DAG_RUN_ID/taskInstances/print_the_context",
         environ_overrides={"REMOTE_USER": "******"},
     )
     assert response.status_code == 200
     assert response.json == {
         "dag_id": "example_python_operator",
         "duration": 10000.0,
         "end_date": "2020-01-03T00:00:00+00:00",
         "execution_date": "2020-01-01T00:00:00+00:00",
         "executor_config": "{}",
         "hostname": "",
         "max_tries": 0,
         "operator": "PythonOperator",
         "pid": 100,
         "pool": "default_pool",
         "pool_slots": 1,
         "priority_weight": 6,
         "queue": "default_queue",
         "queued_when": None,
         "sla_miss": None,
         "start_date": "2020-01-02T00:00:00+00:00",
         "state": "removed",
         "task_id": "print_the_context",
         "try_number": 0,
         "unixname": getuser(),
     }
    def test_start_and_terminate_run_as_user(self):
        local_task_job = mock.Mock()
        local_task_job.task_instance = mock.MagicMock()
        local_task_job.task_instance.run_as_user = getuser()
        local_task_job.task_instance.command_as_list.return_value = [
            'airflow',
            'tasks',
            'test',
            'test_on_kill',
            'task1',
            '2016-01-01',
        ]

        runner = StandardTaskRunner(local_task_job)

        runner.start()
        time.sleep(0.5)

        pgid = os.getpgid(runner.process.pid)
        assert pgid > 0
        assert pgid != os.getpgid(
            0), "Task should be in a different process group to us"

        processes = list(self._procs_in_pgroup(pgid))

        runner.terminate()

        for process in processes:
            assert not psutil.pid_exists(
                process.pid), f"{process} is still alive"

        assert runner.return_code() is not None
 def test_task_instance_schema_without_sla(self, session):
     ti = TI(task=self.task, **self.default_ti_init)
     for key, value in self.default_ti_extras.items():
         setattr(ti, key, value)
     session.add(ti)
     session.commit()
     serialized_ti = task_instance_schema.dump((ti, None))
     expected_json = {
         "dag_id": "TEST_DAG_ID",
         "duration": 10000.0,
         "end_date": "2020-01-03T00:00:00+00:00",
         "execution_date": "2020-01-01T00:00:00+00:00",
         "executor_config": "{}",
         "hostname": "",
         "max_tries": 0,
         "operator": "DummyOperator",
         "pid": 100,
         "pool": "default_pool",
         "pool_slots": 1,
         "priority_weight": 1,
         "queue": "default_queue",
         "queued_when": None,
         "sla_miss": None,
         "start_date": "2020-01-02T00:00:00+00:00",
         "state": "running",
         "task_id": "TEST_TASK_ID",
         "try_number": 0,
         "unixname": getuser(),
     }
     assert serialized_ti == expected_json
Esempio n. 4
0
    def __init__(self, local_task_job):
        # Pass task instance context into log handlers to setup the logger.
        super().__init__(local_task_job.task_instance)
        self._task_instance = local_task_job.task_instance

        popen_prepend = []
        if self._task_instance.run_as_user:
            self.run_as_user = self._task_instance.run_as_user
        else:
            try:
                self.run_as_user = conf.get('core', 'default_impersonation')
            except AirflowConfigException:
                self.run_as_user = None

        self._error_file = NamedTemporaryFile(delete=True)

        # Add sudo commands to change user if we need to. Needed to handle SubDagOperator
        # case using a SequentialExecutor.
        self.log.debug("Planning to run as the %s user", self.run_as_user)
        if self.run_as_user and (self.run_as_user != getuser()):
            # We want to include any environment variables now, as we won't
            # want to have to specify them in the sudo call - they would show
            # up in `ps` that way! And run commands now, as the other user
            # might not be able to run the cmds to get credentials
            cfg_path = tmp_configuration_copy(chmod=0o600, include_env=True, include_cmds=True)

            # Give ownership of file to user; only they can read and write
            subprocess.check_call(
                ['sudo', 'chown', self.run_as_user, cfg_path, self._error_file.name], close_fds=True
            )

            # propagate PYTHONPATH environment variable
            pythonpath_value = os.environ.get(PYTHONPATH_VAR, '')
            popen_prepend = ['sudo', '-E', '-H', '-u', self.run_as_user]

            if pythonpath_value:
                popen_prepend.append(f'{PYTHONPATH_VAR}={pythonpath_value}')

        else:
            # Always provide a copy of the configuration file settings. Since
            # we are running as the same user, and can pass through environment
            # variables then we don't need to include those in the config copy
            # - the runner can read/execute those values as it needs
            cfg_path = tmp_configuration_copy(chmod=0o600, include_env=False, include_cmds=False)

        self._cfg_path = cfg_path
        self._command = (
            popen_prepend
            + self._task_instance.command_as_list(
                raw=True,
                pickle_id=local_task_job.pickle_id,
                mark_success=local_task_job.mark_success,
                job_id=local_task_job.id,
                pool=local_task_job.pool,
                cfg_path=cfg_path,
            )
            + ["--error-file", self._error_file.name]
        )
        self.process = None
Esempio n. 5
0
def _build_metrics(func_name, namespace):
    """
    Builds metrics dict from function args
    It assumes that function arguments is from airflow.bin.cli module's function
    and has Namespace instance where it optionally contains "dag_id", "task_id",
    and "execution_date".

    :param func_name: name of function
    :param namespace: Namespace instance from argparse
    :return: dict with metrics
    """
    from airflow.models import Log

    sub_commands_to_check = {'users', 'connections'}
    sensitive_fields = {'-p', '--password', '--conn-password'}
    full_command = list(sys.argv)
    sub_command = full_command[1] if len(full_command) > 1 else None
    if sub_command in sub_commands_to_check:
        for idx, command in enumerate(full_command):
            if command in sensitive_fields:
                # For cases when password is passed as "--password xyz" (with space between key and value)
                full_command[idx + 1] = "*" * 8
            else:
                # For cases when password is passed as "--password=xyz" (with '=' between key and value)
                for sensitive_field in sensitive_fields:
                    if command.startswith(f'{sensitive_field}='):
                        full_command[idx] = f'{sensitive_field}={"*" * 8}'

    metrics = {
        'sub_command': func_name,
        'start_datetime': datetime.utcnow(),
        'full_command': f'{full_command}',
        'user': getuser(),
    }

    if not isinstance(namespace, Namespace):
        raise ValueError(
            "namespace argument should be argparse.Namespace instance,"
            f"but is {type(namespace)}")
    tmp_dic = vars(namespace)
    metrics['dag_id'] = tmp_dic.get('dag_id')
    metrics['task_id'] = tmp_dic.get('task_id')
    metrics['execution_date'] = tmp_dic.get('execution_date')
    metrics['host_name'] = socket.gethostname()

    extra = json.dumps({k: metrics[k] for k in ('host_name', 'full_command')})
    log = Log(
        event=f'cli_{func_name}',
        task_instance=None,
        owner=metrics['user'],
        extra=extra,
        task_id=metrics.get('task_id'),
        dag_id=metrics.get('dag_id'),
        execution_date=metrics.get('execution_date'),
    )
    metrics['log'] = log
    return metrics
Esempio n. 6
0
def _build_metrics(func_name, args, kwargs):
    """
    Builds metrics dict from function args
    If the first item in args is a Namespace instance, it assumes that it
    optionally contains "dag_id", "task_id", and "execution_date".

    :param func_name: name of function
    :param args: Arguments from wrapped function, possibly including the Namespace instance from
                 argparse as the first argument
    :param kwargs: Keyword arguments from wrapped function
    :return: dict with metrics
    """
    from airflow.models import Log

    sub_commands_to_check = {'users', 'connections'}
    sensitive_fields = {'-p', '--password', '--conn-password'}
    full_command = list(sys.argv)
    sub_command = full_command[1] if len(full_command) > 1 else None
    if sub_command in sub_commands_to_check:
        for idx, command in enumerate(full_command):
            if command in sensitive_fields:
                # For cases when password is passed as "--password xyz" (with space between key and value)
                full_command[idx + 1] = "*" * 8
            else:
                # For cases when password is passed as "--password=xyz" (with '=' between key and value)
                for sensitive_field in sensitive_fields:
                    if command.startswith(f'{sensitive_field}='):
                        full_command[idx] = f'{sensitive_field}={"*" * 8}'

    metrics = {
        'sub_command': func_name,
        'start_datetime': datetime.utcnow(),
        'full_command': f'{full_command}',
        'user': getuser(),
    }

    tmp_dic = vars(args[0]) if (args
                                and isinstance(args[0], Namespace)) else kwargs
    metrics['dag_id'] = tmp_dic.get('dag_id')
    metrics['task_id'] = tmp_dic.get('task_id')
    metrics['execution_date'] = tmp_dic.get('execution_date')
    metrics['host_name'] = socket.gethostname()

    extra = json.dumps({k: metrics[k] for k in ('host_name', 'full_command')})
    log = Log(
        event=f'cli_{func_name}',
        task_instance=None,
        owner=metrics['user'],
        extra=extra,
        task_id=metrics.get('task_id'),
        dag_id=metrics.get('dag_id'),
        execution_date=metrics.get('execution_date'),
    )
    metrics['log'] = log
    return metrics
 def __init__(self, local_task_job):
     super().__init__(local_task_job)
     self.process = None
     self._finished_running = False
     self._cpu_shares = None
     self._mem_mb_limit = None
     self.mem_cgroup_name = None
     self.cpu_cgroup_name = None
     self._created_cpu_cgroup = False
     self._created_mem_cgroup = False
     self._cur_user = getuser()
Esempio n. 8
0
 def __init__(self, executor=None, heartrate=None, *args, **kwargs):
     self.hostname = get_hostname()
     self.executor = executor or ExecutorLoader.get_default_executor()
     self.executor_class = self.executor.__class__.__name__
     self.start_date = timezone.utcnow()
     self.latest_heartbeat = timezone.utcnow()
     if heartrate is not None:
         self.heartrate = heartrate
     self.unixname = getuser()
     self.max_tis_per_query = conf.getint('scheduler', 'max_tis_per_query')
     super().__init__(*args, **kwargs)
Esempio n. 9
0
 def __init__(self, executor=None, heartrate=None, *args, **kwargs):
     self.hostname = get_hostname()
     if executor:
         self.executor = executor
         self.executor_class = executor.__class__.__name__
     else:
         self.executor_class = conf.get('core', 'EXECUTOR')
     self.start_date = timezone.utcnow()
     self.latest_heartbeat = timezone.utcnow()
     if heartrate is not None:
         self.heartrate = heartrate
     self.unixname = getuser()
     self.max_tis_per_query: int = conf.getint('scheduler', 'max_tis_per_query')
     super().__init__(*args, **kwargs)
    def test_should_respond_200_task_instance_with_sla(self, session):
        self.create_task_instances(session)
        session.query()
        sla_miss = SlaMiss(
            task_id="print_the_context",
            dag_id="example_python_operator",
            execution_date=self.default_time,
            timestamp=self.default_time,
        )
        session.add(sla_miss)
        session.commit()
        response = self.client.get(
            "/api/v1/dags/example_python_operator/dagRuns/TEST_DAG_RUN_ID/taskInstances/print_the_context",
            environ_overrides={"REMOTE_USER": "******"},
        )
        assert response.status_code == 200

        assert response.json == {
            "dag_id": "example_python_operator",
            "duration": 10000.0,
            "end_date": "2020-01-03T00:00:00+00:00",
            "execution_date": "2020-01-01T00:00:00+00:00",
            "executor_config": "{}",
            "hostname": "",
            "max_tries": 0,
            "operator": "PythonOperator",
            "pid": 100,
            "pool": "default_pool",
            "pool_slots": 1,
            "priority_weight": 6,
            "queue": "default_queue",
            "queued_when": None,
            "sla_miss": {
                "dag_id": "example_python_operator",
                "description": None,
                "email_sent": False,
                "execution_date": "2020-01-01T00:00:00+00:00",
                "notification_sent": False,
                "task_id": "print_the_context",
                "timestamp": "2020-01-01T00:00:00+00:00",
            },
            "start_date": "2020-01-02T00:00:00+00:00",
            "state": "running",
            "task_id": "print_the_context",
            "try_number": 0,
            "unixname": getuser(),
        }
    def test_early_reap_exit(self, caplog):
        """
        Tests that when a child process running a task is killed externally
        (e.g. by an OOM error, which we fake here), then we get return code
        -9 and a log message.
        """
        # Set up mock task
        local_task_job = mock.Mock()
        local_task_job.task_instance = mock.MagicMock()
        local_task_job.task_instance.run_as_user = getuser()
        local_task_job.task_instance.command_as_list.return_value = [
            'airflow',
            'tasks',
            'test',
            'test_on_kill',
            'task1',
            '2016-01-01',
        ]

        # Kick off the runner
        runner = StandardTaskRunner(local_task_job)
        runner.start()
        time.sleep(0.2)

        # Kill the child process externally from the runner
        # Note that we have to do this from ANOTHER process, as if we just
        # call os.kill here we're doing it from the parent process and it
        # won't be the same as an external kill in terms of OS tracking.
        pgid = os.getpgid(runner.process.pid)
        os.system(f"kill -s KILL {pgid}")
        time.sleep(0.2)

        runner.terminate()

        assert runner.return_code() == -9
        assert "running out of memory" in caplog.text
Esempio n. 12
0
    def __init__(
        self,
        ssh_conn_id: Optional[str] = None,
        remote_host: Optional[str] = None,
        username: Optional[str] = None,
        password: Optional[str] = None,
        key_file: Optional[str] = None,
        port: Optional[int] = None,
        timeout: Optional[int] = None,
        conn_timeout: Optional[int] = None,
        keepalive_interval: int = 30,
    ) -> None:
        super().__init__()
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.username = username
        self.password = password
        self.key_file = key_file
        self.pkey = None
        self.port = port
        self.timeout = timeout
        self.conn_timeout = conn_timeout
        self.keepalive_interval = keepalive_interval

        # Default values, overridable from Connection
        self.compress = True
        self.no_host_key_check = True
        self.allow_host_key_change = False
        self.host_proxy = None
        self.host_key = None
        self.look_for_keys = True

        # Placeholder for deprecated __enter__
        self.client = None

        # Use connection to override defaults
        if self.ssh_conn_id is not None:
            conn = self.get_connection(self.ssh_conn_id)
            if self.username is None:
                self.username = conn.login
            if self.password is None:
                self.password = conn.password
            if self.remote_host is None:
                self.remote_host = conn.host
            if self.port is None:
                self.port = conn.port

            if conn.extra is not None:
                extra_options = conn.extra_dejson
                if "key_file" in extra_options and self.key_file is None:
                    self.key_file = extra_options.get("key_file")

                private_key = extra_options.get('private_key')
                private_key_passphrase = extra_options.get('private_key_passphrase')
                if private_key:
                    self.pkey = self._pkey_from_private_key(private_key, passphrase=private_key_passphrase)

                if "timeout" in extra_options:
                    warnings.warn(
                        'Extra option `timeout` is deprecated.'
                        'Please use `conn_timeout` instead.'
                        'The old option `timeout` will be removed in a future version.',
                        DeprecationWarning,
                        stacklevel=2,
                    )
                    self.timeout = int(extra_options['timeout'])

                if "conn_timeout" in extra_options and self.conn_timeout is None:
                    self.conn_timeout = int(extra_options['conn_timeout'])

                if "compress" in extra_options and str(extra_options["compress"]).lower() == 'false':
                    self.compress = False

                host_key = extra_options.get("host_key")
                no_host_key_check = extra_options.get("no_host_key_check")

                if no_host_key_check is not None:
                    no_host_key_check = str(no_host_key_check).lower() == "true"
                    if host_key is not None and no_host_key_check:
                        raise ValueError("Must check host key when provided")

                    self.no_host_key_check = no_host_key_check

                if (
                    "allow_host_key_change" in extra_options
                    and str(extra_options["allow_host_key_change"]).lower() == 'true'
                ):
                    self.allow_host_key_change = True

                if (
                    "look_for_keys" in extra_options
                    and str(extra_options["look_for_keys"]).lower() == 'false'
                ):
                    self.look_for_keys = False

                if host_key is not None:
                    if host_key.startswith("ssh-"):
                        key_type, host_key = host_key.split(None)[:2]
                        key_constructor = self._host_key_mappings[key_type[4:]]
                    else:
                        key_constructor = paramiko.RSAKey
                    decoded_host_key = decodebytes(host_key.encode('utf-8'))
                    self.host_key = key_constructor(data=decoded_host_key)
                    self.no_host_key_check = False

        if self.timeout:
            warnings.warn(
                'Parameter `timeout` is deprecated.'
                'Please use `conn_timeout` instead.'
                'The old option `timeout` will be removed in a future version.',
                DeprecationWarning,
                stacklevel=1,
            )

        if self.conn_timeout is None:
            self.conn_timeout = self.timeout if self.timeout else TIMEOUT_DEFAULT

        if self.pkey and self.key_file:
            raise AirflowException(
                "Params key_file and private_key both provided.  Must provide no more than one."
            )

        if not self.remote_host:
            raise AirflowException("Missing required param: remote_host")

        # Auto detecting username values from system
        if not self.username:
            self.log.debug(
                "username to ssh to host: %s is not specified for connection id"
                " %s. Using system's default provided by getpass.getuser()",
                self.remote_host,
                self.ssh_conn_id,
            )
            self.username = getuser()

        user_ssh_config_filename = os.path.expanduser('~/.ssh/config')
        if os.path.isfile(user_ssh_config_filename):
            ssh_conf = paramiko.SSHConfig()
            with open(user_ssh_config_filename) as config_fd:
                ssh_conf.parse(config_fd)
            host_info = ssh_conf.lookup(self.remote_host)
            if host_info and host_info.get('proxycommand'):
                self.host_proxy = paramiko.ProxyCommand(host_info.get('proxycommand'))

            if not (self.password or self.key_file):
                if host_info and host_info.get('identityfile'):
                    self.key_file = host_info.get('identityfile')[0]

        self.port = self.port or SSH_PORT
Esempio n. 13
0
 def __init__(self):
     home_path = os.path.expanduser("~")
     username = getuser()
     self._path_replacements = {home_path: "${HOME}", username: "******"}
Esempio n. 14
0
    def get_conn(self):
        if self.client:
            return self.client

        self.log.debug('Creating WinRM client for conn_id: %s', self.ssh_conn_id)
        if self.ssh_conn_id is not None:
            conn = self.get_connection(self.ssh_conn_id)

            if self.username is None:
                self.username = conn.login
            if self.password is None:
                self.password = conn.password
            if self.remote_host is None:
                self.remote_host = conn.host

            if conn.extra is not None:
                extra_options = conn.extra_dejson

                if "endpoint" in extra_options:
                    self.endpoint = str(extra_options["endpoint"])
                if "remote_port" in extra_options:
                    self.remote_port = int(extra_options["remote_port"])
                if "transport" in extra_options:
                    self.transport = str(extra_options["transport"])
                if "service" in extra_options:
                    self.service = str(extra_options["service"])
                if "keytab" in extra_options:
                    self.keytab = str(extra_options["keytab"])
                if "ca_trust_path" in extra_options:
                    self.ca_trust_path = str(extra_options["ca_trust_path"])
                if "cert_pem" in extra_options:
                    self.cert_pem = str(extra_options["cert_pem"])
                if "cert_key_pem" in extra_options:
                    self.cert_key_pem = str(extra_options["cert_key_pem"])
                if "server_cert_validation" in extra_options:
                    self.server_cert_validation = str(extra_options["server_cert_validation"])
                if "kerberos_delegation" in extra_options:
                    self.kerberos_delegation = str(extra_options["kerberos_delegation"]).lower() == 'true'
                if "read_timeout_sec" in extra_options:
                    self.read_timeout_sec = int(extra_options["read_timeout_sec"])
                if "operation_timeout_sec" in extra_options:
                    self.operation_timeout_sec = int(extra_options["operation_timeout_sec"])
                if "kerberos_hostname_override" in extra_options:
                    self.kerberos_hostname_override = str(extra_options["kerberos_hostname_override"])
                if "message_encryption" in extra_options:
                    self.message_encryption = str(extra_options["message_encryption"])
                if "credssp_disable_tlsv1_2" in extra_options:
                    self.credssp_disable_tlsv1_2 = (
                        str(extra_options["credssp_disable_tlsv1_2"]).lower() == 'true'
                    )
                if "send_cbt" in extra_options:
                    self.send_cbt = str(extra_options["send_cbt"]).lower() == 'true'

        if not self.remote_host:
            raise AirflowException("Missing required param: remote_host")

        # Auto detecting username values from system
        if not self.username:
            self.log.debug(
                "username to WinRM to host: %s is not specified for connection id"
                " %s. Using system's default provided by getpass.getuser()",
                self.remote_host,
                self.ssh_conn_id,
            )
            self.username = getuser()

        # If endpoint is not set, then build a standard wsman endpoint from host and port.
        if not self.endpoint:
            self.endpoint = f'http://{self.remote_host}:{self.remote_port}/wsman'

        try:
            if self.password and self.password.strip():
                self.winrm_protocol = Protocol(
                    endpoint=self.endpoint,
                    transport=self.transport,
                    username=self.username,
                    password=self.password,
                    service=self.service,
                    keytab=self.keytab,
                    ca_trust_path=self.ca_trust_path,
                    cert_pem=self.cert_pem,
                    cert_key_pem=self.cert_key_pem,
                    server_cert_validation=self.server_cert_validation,
                    kerberos_delegation=self.kerberos_delegation,
                    read_timeout_sec=self.read_timeout_sec,
                    operation_timeout_sec=self.operation_timeout_sec,
                    kerberos_hostname_override=self.kerberos_hostname_override,
                    message_encryption=self.message_encryption,
                    credssp_disable_tlsv1_2=self.credssp_disable_tlsv1_2,
                    send_cbt=self.send_cbt,
                )

            self.log.info("Establishing WinRM connection to host: %s", self.remote_host)
            self.client = self.winrm_protocol.open_shell()

        except Exception as error:
            error_msg = f"Error connecting to host: {self.remote_host}, error: {error}"
            self.log.error(error_msg)
            raise AirflowException(error_msg)

        return self.client
Esempio n. 15
0
    def __init__(  # pylint: disable=too-many-statements
        self,
        ssh_conn_id: Optional[str] = None,
        remote_host: Optional[str] = None,
        username: Optional[str] = None,
        password: Optional[str] = None,
        key_file: Optional[str] = None,
        port: Optional[int] = None,
        timeout: int = 10,
        keepalive_interval: int = 30,
    ) -> None:
        super().__init__()
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.username = username
        self.password = password
        self.key_file = key_file
        self.pkey = None
        self.port = port
        self.timeout = timeout
        self.keepalive_interval = keepalive_interval

        # Default values, overridable from Connection
        self.compress = True
        self.no_host_key_check = True
        self.allow_host_key_change = False
        self.host_proxy = None
        self.host_key = None
        self.look_for_keys = True

        # Placeholder for deprecated __enter__
        self.client = None

        # Use connection to override defaults
        if self.ssh_conn_id is not None:
            conn = self.get_connection(self.ssh_conn_id)
            if self.username is None:
                self.username = conn.login
            if self.password is None:
                self.password = conn.password
            if self.remote_host is None:
                self.remote_host = conn.host
            if self.port is None:
                self.port = conn.port
            if conn.extra is not None:
                extra_options = conn.extra_dejson
                if "key_file" in extra_options and self.key_file is None:
                    self.key_file = extra_options.get("key_file")

                private_key = extra_options.get('private_key')
                private_key_passphrase = extra_options.get(
                    'private_key_passphrase')
                if private_key:
                    self.pkey = self._pkey_from_private_key(
                        private_key, passphrase=private_key_passphrase)
                if "timeout" in extra_options:
                    self.timeout = int(extra_options["timeout"], 10)

                if "compress" in extra_options and str(
                        extra_options["compress"]).lower() == 'false':
                    self.compress = False
                if ("no_host_key_check" in extra_options
                        and str(extra_options["no_host_key_check"]).lower()
                        == 'false'):
                    self.no_host_key_check = False
                if ("allow_host_key_change" in extra_options and str(
                        extra_options["allow_host_key_change"]).lower()
                        == 'true'):
                    self.allow_host_key_change = True
                if ("look_for_keys" in extra_options and str(
                        extra_options["look_for_keys"]).lower() == 'false'):
                    self.look_for_keys = False
                if "host_key" in extra_options and self.no_host_key_check is False:
                    decoded_host_key = decodebytes(
                        extra_options["host_key"].encode('utf-8'))
                    self.host_key = paramiko.RSAKey(data=decoded_host_key)
        if self.pkey and self.key_file:
            raise AirflowException(
                "Params key_file and private_key both provided.  Must provide no more than one."
            )

        if not self.remote_host:
            raise AirflowException("Missing required param: remote_host")

        # Auto detecting username values from system
        if not self.username:
            self.log.debug(
                "username to ssh to host: %s is not specified for connection id"
                " %s. Using system's default provided by getpass.getuser()",
                self.remote_host,
                self.ssh_conn_id,
            )
            self.username = getuser()

        user_ssh_config_filename = os.path.expanduser('~/.ssh/config')
        if os.path.isfile(user_ssh_config_filename):
            ssh_conf = paramiko.SSHConfig()
            with open(user_ssh_config_filename) as config_fd:
                ssh_conf.parse(config_fd)
            host_info = ssh_conf.lookup(self.remote_host)
            if host_info and host_info.get('proxycommand'):
                self.host_proxy = paramiko.ProxyCommand(
                    host_info.get('proxycommand'))

            if not (self.password or self.key_file):
                if host_info and host_info.get('identityfile'):
                    self.key_file = host_info.get('identityfile')[0]

        self.port = self.port or SSH_PORT