def execute(self, context) -> None: """Execute call""" args = self.cls.parse(self.create_cmd_args(context)) self.cmd = self.cls.create(**args) self.task_instance = context['task_instance'] context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id) # type: ignore[attr-defined] self.log.info( "Qubole command created with Id: %s and Status: %s", self.cmd.id, # type: ignore[attr-defined] self.cmd.status, # type: ignore[attr-defined] ) while not Command.is_done(self.cmd.status): # type: ignore[attr-defined] time.sleep(Qubole.poll_interval) self.cmd = self.cls.find(self.cmd.id) # type: ignore[attr-defined] self.log.info( "Command Id: %s and Status: %s", self.cmd.id, self.cmd.status # type: ignore[attr-defined] ) if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True: self.log.info( "Logs for Command Id: %s \n%s", self.cmd.id, self.cmd.get_log() # type: ignore[attr-defined] ) if self.cmd.status != 'done': # type: ignore[attr-defined] raise AirflowException( 'Command Id: {} failed with Status: {}'.format( self.cmd.id, self.cmd.status # type: ignore[attr-defined] ) )
def monitor_command(self, command, sql_stmt): _command = command time.sleep(10) try: _command = Command.find(_command.id) except: time.sleep(30) _command = Command.find(_command.id) total_sleep_time = 0 retries = 1000 command_id = _command.id for i in range(retries): if _command.status == 'error': raise AirflowException( 'Statement failed: https://api.qubole.com/v2/analyze?command_id=%s\n %s' % (command_id, sql_stmt)) elif Command.is_done(_command.status): return else: total_sleep_time += 10 if total_sleep_time > self.expected_runtime * 1.5: raise AirflowException( "RS Total estimated runtime was exceeded, please adjust estimation in DAG if the process requires more time to complete query %s" % sql_stmt) time.sleep(10) _command = Command.find(command_id) raise AirflowException( 'RS_monitor_command call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s' % (sql_stmt, command.id))
def run_query(self, query, user): qbol.configure(api_token=self.configuration['token'], api_url='%s/api' % self.configuration['endpoint']) try: cls = PrestoCommand if (self.configuration['query_type'] == 'presto') else HiveCommand cmd = cls.create(query=query, label=self.configuration['cluster']) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == 'done': fp = StringIO() cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False, qlog=None, arguments=['true']) results = fp.getvalue() fp.close() data = results.split('\r\n') columns = self.fetch_columns([ (i, TYPE_STRING) for i in data.pop(0).split('\t') ]) rows = [ dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data ] json_data = json_dumps({'columns': columns, 'rows': rows}) except KeyboardInterrupt: logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error
def execute(self, context): args = self.cls.parse(self.args) self.cmd = self.cls.create(**args) context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id) logging.info("Qubole command created with Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status)) while not Command.is_done(self.cmd.status): time.sleep(Qubole.poll_interval) self.cmd = self.cls.find(self.cmd.id) logging.info("Command Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status)) if self.kwargs.has_key('fetch_logs') and self.kwargs['fetch_logs'] == True: logging.info("Logs for Command Id: {0} \n{1}".format(str(self.cmd.id), self.cmd.get_log())) if self.cmd.status != 'done': raise AirflowException('Command Id: {0} failed with Status: {1}'.format(self.cmd.id, self.cmd.status))
def execute(self, context): args = self.cls.parse(self.create_cmd_args(context)) self.cmd = self.cls.create(**args) context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id) logging.info("Qubole command created with Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status)) while not Command.is_done(self.cmd.status): time.sleep(Qubole.poll_interval) self.cmd = self.cls.find(self.cmd.id) logging.info("Command Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status)) if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True: logging.info("Logs for Command Id: {0} \n{1}".format(str(self.cmd.id), self.cmd.get_log())) if self.cmd.status != 'done': raise AirflowException('Command Id: {0} failed with Status: {1}'.format(self.cmd.id, self.cmd.status))
def execute(self, context): args = self.cls.parse(self.create_cmd_args(context)) self.cmd = self.cls.create(**args) context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id) _log.info("Qubole command created with Id: %s and Status: %s", self.cmd.id, self.cmd.status) while not Command.is_done(self.cmd.status): time.sleep(Qubole.poll_interval) self.cmd = self.cls.find(self.cmd.id) _log.info("Command Id: %s and Status: %s", self.cmd.id, self.cmd.status) if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True: _log.info("Logs for Command Id: %s \n%s", self.cmd.id, self.cmd.get_log()) if self.cmd.status != 'done': raise AirflowException('Command Id: {0} failed with Status: {1}'.format( self.cmd.id, self.cmd.status))
def run_query(self, query, user): qbol.configure(api_token=self.configuration['token'], api_url='%s/api' % self.configuration['endpoint']) try: cls = PrestoCommand if(self.configuration['query_type'] == 'presto') else HiveCommand cmd = cls.create(query=query, label=self.configuration['cluster']) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == 'done': fp = StringIO() cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False, qlog=None, arguments=['true']) results = fp.getvalue() fp.close() data = results.split('\r\n') columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')]) rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data] json_data = json_dumps({'columns': columns, 'rows': rows}) except KeyboardInterrupt: logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error
def run_query(self, query, user): qbol.configure( api_token=self.configuration.get("token"), api_url="%s/api" % self.configuration.get("endpoint"), ) try: query_type = self.configuration.get("query_type", "hive") if query_type == "quantum": cmd = SqlCommand.create(query=query) elif query_type == "hive": cmd = HiveCommand.create( query=query, label=self.configuration.get("cluster")) elif query_type == "presto": cmd = PrestoCommand.create( query=query, label=self.configuration.get("cluster")) else: raise Exception("Invalid Query Type:%s.\ It must be : hive / presto / quantum." % self.configuration.get("query_type")) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == "done": fp = StringIO() cmd.get_results( fp=fp, inline=True, delim="\t", fetch=False, qlog=None, arguments=["true"], ) results = fp.getvalue() fp.close() data = results.split("\r\n") columns = self.fetch_columns([ (i, TYPE_STRING) for i in data.pop(0).split("\t") ]) rows = [ dict( zip((column["name"] for column in columns), row.split("\t"))) for row in data ] json_data = json_dumps({"columns": columns, "rows": rows}) except KeyboardInterrupt: logging.info("Sending KILL signal to Qubole Command Id: %s", cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error