Example #1
0
    def _prepare_cli_cmd(self):
        """
        This function creates the command list from available information
        """
        conn = self.conn
        hive_bin = 'hive'
        cmd_extra = []

        if self.use_beeline:
            hive_bin = 'beeline'
            jdbc_url = "jdbc:hive2://{host}:{port}/{schema}".format(
                host=conn.host, port=conn.port, schema=conn.schema)
            if configuration.conf.get('core', 'security') == 'kerberos':
                template = conn.extra_dejson.get(
                    'principal', "hive/[email protected]")
                if "_HOST" in template:
                    template = utils.replace_hostname_pattern(
                        utils.get_components(template))

                proxy_user = ""  # noqa
                if conn.extra_dejson.get('proxy_user') == "login" and conn.login:
                    proxy_user = "hive.server2.proxy.user={0}".format(conn.login)
                elif conn.extra_dejson.get('proxy_user') == "owner" and self.run_as:
                    proxy_user = "hive.server2.proxy.user={0}".format(self.run_as)

                jdbc_url += ";principal={template};{proxy_user}".format(
                    template=template, proxy_user=proxy_user)
            elif self.auth:
                jdbc_url += ";auth=" + self.auth

            jdbc_url = '"{}"'.format(jdbc_url)

            cmd_extra += ['-u', jdbc_url]
            if conn.login:
                cmd_extra += ['-n', conn.login]
            if conn.password:
                cmd_extra += ['-p', conn.password]

        hive_params_list = self.hive_cli_params.split()

        return [hive_bin] + cmd_extra + hive_params_list
Example #2
0
    def run_cli(self, hql, schema=None, verbose=True):
        """
        Run an hql statement using the hive cli

        >>> hh = HiveCliHook()
        >>> result = hh.run_cli("USE airflow;")
        >>> ("OK" in result)
        True
        """
        conn = self.conn
        schema = schema or conn.schema
        if schema:
            hql = "USE {schema};\n{hql}".format(**locals())

        with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir) as f:
                f.write(hql.encode('UTF-8'))
                f.flush()
                fname = f.name
                hive_bin = 'hive'
                cmd_extra = []

                if self.use_beeline:
                    hive_bin = 'beeline'
                    jdbc_url = "jdbc:hive2://{conn.host}:{conn.port}/{conn.schema}"
                    if configuration.get('core', 'security') == 'kerberos':
                        template = conn.extra_dejson.get(
                            'principal', "hive/[email protected]")
                        if "_HOST" in template:
                            template = utils.replace_hostname_pattern(
                                utils.get_components(template))

                        proxy_user = ""  # noqa
                        if conn.extra_dejson.get('proxy_user') == "login" and conn.login:
                            proxy_user = "hive.server2.proxy.user={0}".format(conn.login)
                        elif conn.extra_dejson.get('proxy_user') == "owner" and self.run_as:
                            proxy_user = "hive.server2.proxy.user={0}".format(self.run_as)

                        jdbc_url += ";principal={template};{proxy_user}"
                    elif self.auth:
                        jdbc_url += ";auth=" + self.auth

                    jdbc_url = jdbc_url.format(**locals())

                    cmd_extra += ['-u', jdbc_url]
                    if conn.login:
                        cmd_extra += ['-n', conn.login]
                    if conn.password:
                        cmd_extra += ['-p', conn.password]

                hive_cmd = [hive_bin, '-f', fname] + cmd_extra

                if self.hive_cli_params:
                    hive_params_list = self.hive_cli_params.split()
                    hive_cmd.extend(hive_params_list)
                if verbose:
                    logging.info(" ".join(hive_cmd))
                sp = subprocess.Popen(
                    hive_cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    cwd=tmp_dir)
                self.sp = sp
                stdout = ''
                while True:
                    line = sp.stdout.readline()
                    if not line:
                        break
                    stdout += line.decode('UTF-8')
                    if verbose:
                        logging.info(line.decode('UTF-8').strip())
                sp.wait()

                if sp.returncode:
                    raise AirflowException(stdout)

                return stdout
Example #3
0
    def run_cli(self, hql, schema=None, verbose=True):
        """
        Run an hql statement using the hive cli

        >>> hh = HiveCliHook()
        >>> result = hh.run_cli("USE airflow;")
        >>> ("OK" in result)
        True
        """
        conn = self.conn
        schema = schema or conn.schema
        if schema:
            hql = "USE {schema};\n{hql}".format(**locals())

        with TemporaryDirectory(prefix="airflow_hiveop_") as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir) as f:
                f.write(hql)
                f.flush()
                fname = f.name
                hive_bin = "hive"
                cmd_extra = []

                if self.use_beeline:
                    hive_bin = "beeline"
                    if conf.get("core", "security") == "kerberos":
                        template = conn.extra_dejson.get("principal", "hive/[email protected]")
                        template = utils.replace_hostname_pattern(utils.get_components(template))

                        proxy_user = ""
                        if conn.extra_dejson.get("proxy_user") == "login" and conn.login:
                            proxy_user = "hive.server2.proxy.user={0}".format(conn.login)
                        elif conn.extra_dejson.get("proxy_user") == "owner" and self.run_as:
                            proxy_user = "hive.server2.proxy.user={0}".format(self.run_as)

                        jdbc_url = ("jdbc:hive2://" "{0}:{1}/{2}" ";principal={3}{4}").format(
                            conn.host, conn.port, conn.schema, template, proxy_user
                        )
                    else:
                        jdbc_url = ("jdbc:hive2://" "{0}:{1}/{2}" ";auth=noSasl").format(
                            conn.host, conn.port, conn.schema
                        )

                    cmd_extra += ["-u", jdbc_url]
                    if conn.login:
                        cmd_extra += ["-n", conn.login]
                    if conn.password:
                        cmd_extra += ["-p", conn.password]

                hive_cmd = [hive_bin, "-f", fname] + cmd_extra

                if self.hive_cli_params:
                    hive_params_list = self.hive_cli_params.split()
                    hive_cmd.extend(hive_params_list)
                if verbose:
                    logging.info(" ".join(hive_cmd))
                sp = subprocess.Popen(hive_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=tmp_dir)
                self.sp = sp
                stdout = ""
                for line in iter(sp.stdout.readline, ""):
                    stdout += line
                    if verbose:
                        logging.info(line.strip())
                sp.wait()

                if sp.returncode:
                    raise AirflowException(stdout)

                return stdout