Example #1
0
    def execute(self, context) -> None:
        """Execute call"""
        args = self.cls.parse(self.create_cmd_args(context))
        self.cmd = self.cls.create(**args)
        self.task_instance = context['task_instance']
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)  # type: ignore[attr-defined]
        self.log.info(
            "Qubole command created with Id: %s and Status: %s",
            self.cmd.id,  # type: ignore[attr-defined]
            self.cmd.status,  # type: ignore[attr-defined]
        )

        while not Command.is_done(self.cmd.status):  # type: ignore[attr-defined]
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)  # type: ignore[attr-defined]
            self.log.info(
                "Command Id: %s and Status: %s", self.cmd.id, self.cmd.status  # type: ignore[attr-defined]
            )

        if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True:
            self.log.info(
                "Logs for Command Id: %s \n%s", self.cmd.id, self.cmd.get_log()  # type: ignore[attr-defined]
            )

        if self.cmd.status != 'done':  # type: ignore[attr-defined]
            raise AirflowException(
                'Command Id: {} failed with Status: {}'.format(
                    self.cmd.id, self.cmd.status  # type: ignore[attr-defined]
                )
            )
Example #2
0
    def monitor_command(self, command, sql_stmt):
        _command = command
        time.sleep(10)
        try:
            _command = Command.find(_command.id)
        except:
            time.sleep(30)
            _command = Command.find(_command.id)

        total_sleep_time = 0
        retries = 1000
        command_id = _command.id
        for i in range(retries):
            if _command.status == 'error':
                raise AirflowException(
                    'Statement failed: https://api.qubole.com/v2/analyze?command_id=%s\n %s'
                    % (command_id, sql_stmt))
            elif Command.is_done(_command.status):
                return
            else:
                total_sleep_time += 10
                if total_sleep_time > self.expected_runtime * 1.5:
                    raise AirflowException(
                        "RS Total estimated runtime was exceeded, please adjust estimation in DAG if the process requires more time to complete query %s"
                        % sql_stmt)
                time.sleep(10)
                _command = Command.find(command_id)

        raise AirflowException(
            'RS_monitor_command call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s'
            % (sql_stmt, command.id))
Example #3
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if (self.configuration['query_type']
                                    == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s",
                         cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id,
                             cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp,
                                inline=True,
                                delim='\t',
                                fetch=False,
                                qlog=None,
                                arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([
                    (i, TYPE_STRING) for i in data.pop(0).split('\t')
                ])
                rows = [
                    dict(zip((c['name'] for c in columns), row.split('\t')))
                    for row in data
                ]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s',
                         cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Example #4
0
    def execute(self, context):
        args = self.cls.parse(self.args)
        self.cmd = self.cls.create(**args)
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)
        logging.info("Qubole command created with Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        while not Command.is_done(self.cmd.status):
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)
            logging.info("Command Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        if self.kwargs.has_key('fetch_logs') and self.kwargs['fetch_logs'] == True:
            logging.info("Logs for Command Id: {0} \n{1}".format(str(self.cmd.id), self.cmd.get_log()))

        if self.cmd.status != 'done':
            raise AirflowException('Command Id: {0} failed with Status: {1}'.format(self.cmd.id, self.cmd.status))
Example #5
0
    def execute(self, context):
        args = self.cls.parse(self.create_cmd_args(context))
        self.cmd = self.cls.create(**args)
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)
        logging.info("Qubole command created with Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        while not Command.is_done(self.cmd.status):
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)
            logging.info("Command Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True:
            logging.info("Logs for Command Id: {0} \n{1}".format(str(self.cmd.id), self.cmd.get_log()))

        if self.cmd.status != 'done':
            raise AirflowException('Command Id: {0} failed with Status: {1}'.format(self.cmd.id, self.cmd.status))
Example #6
0
    def execute(self, context):
        args = self.cls.parse(self.create_cmd_args(context))
        self.cmd = self.cls.create(**args)
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)
        _log.info("Qubole command created with Id: %s and Status: %s",
                     self.cmd.id, self.cmd.status)

        while not Command.is_done(self.cmd.status):
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)
            _log.info("Command Id: %s and Status: %s", self.cmd.id, self.cmd.status)

        if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True:
            _log.info("Logs for Command Id: %s \n%s", self.cmd.id, self.cmd.get_log())

        if self.cmd.status != 'done':
            raise AirflowException('Command Id: {0} failed with Status: {1}'.format(
                                   self.cmd.id, self.cmd.status))
Example #7
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if(self.configuration['query_type'] == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False,
                                qlog=None, arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')])
                rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Example #8
0
    def run_query(self, query, user):
        qbol.configure(
            api_token=self.configuration.get("token"),
            api_url="%s/api" % self.configuration.get("endpoint"),
        )

        try:
            query_type = self.configuration.get("query_type", "hive")

            if query_type == "quantum":
                cmd = SqlCommand.create(query=query)
            elif query_type == "hive":
                cmd = HiveCommand.create(
                    query=query, label=self.configuration.get("cluster"))
            elif query_type == "presto":
                cmd = PrestoCommand.create(
                    query=query, label=self.configuration.get("cluster"))
            else:
                raise Exception("Invalid Query Type:%s.\
                        It must be : hive / presto / quantum." %
                                self.configuration.get("query_type"))

            logging.info("Qubole command created with Id: %s and Status: %s",
                         cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id,
                             cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == "done":
                fp = StringIO()
                cmd.get_results(
                    fp=fp,
                    inline=True,
                    delim="\t",
                    fetch=False,
                    qlog=None,
                    arguments=["true"],
                )

                results = fp.getvalue()
                fp.close()

                data = results.split("\r\n")
                columns = self.fetch_columns([
                    (i, TYPE_STRING) for i in data.pop(0).split("\t")
                ])
                rows = [
                    dict(
                        zip((column["name"] for column in columns),
                            row.split("\t"))) for row in data
                ]

            json_data = json_dumps({"columns": columns, "rows": rows})
        except KeyboardInterrupt:
            logging.info("Sending KILL signal to Qubole Command Id: %s",
                         cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error