Example #1
0
    def monitor_command(self, command, sql_stmt):
        _command = command
        time.sleep(10)
        try:
            _command = Command.find(_command.id)
        except:
            time.sleep(30)
            _command = Command.find(_command.id)

        total_sleep_time = 0
        retries = 1000
        command_id = _command.id
        for i in range(retries):
            if _command.status == 'error':
                raise AirflowException(
                    'Statement failed: https://api.qubole.com/v2/analyze?command_id=%s\n %s'
                    % (command_id, sql_stmt))
            elif Command.is_done(_command.status):
                return
            else:
                total_sleep_time += 10
                if total_sleep_time > self.expected_runtime * 1.5:
                    raise AirflowException(
                        "RS Total estimated runtime was exceeded, please adjust estimation in DAG if the process requires more time to complete query %s"
                        % sql_stmt)
                time.sleep(10)
                _command = Command.find(command_id)

        raise AirflowException(
            'RS_monitor_command call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s'
            % (sql_stmt, command.id))
 def get_log(self, ti):
     """
     Get Logs of a command from Qubole
     :param ti: Task Instance of the dag, used to determine the Quboles command id
     :return: command log as text
     """
     if self.cmd is None:
         cmd_id = ti.xcom_pull(key="qbol_cmd_id", task_ids=self.task_id)
     Command.get_log_id(self.cls, cmd_id)
 def get_jobs_id(self, ti):
     """
     Get jobs associated with a Qubole commands
     :param ti: Task Instance of the dag, used to determine the Quboles command id
     :return: Job informations assoiciated with command
     """
     if self.cmd is None:
         cmd_id = ti.xcom_pull(key="qbol_cmd_id", task_ids=self.task_id)
     Command.get_jobs_id(self.cls, cmd_id)
Example #4
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if (self.configuration['query_type']
                                    == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s",
                         cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id,
                             cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp,
                                inline=True,
                                delim='\t',
                                fetch=False,
                                qlog=None,
                                arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([
                    (i, TYPE_STRING) for i in data.pop(0).split('\t')
                ])
                rows = [
                    dict(zip((c['name'] for c in columns), row.split('\t')))
                    for row in data
                ]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s',
                         cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Example #5
0
    def execute(self, context) -> None:
        """Execute call"""
        args = self.cls.parse(self.create_cmd_args(context))
        self.cmd = self.cls.create(**args)
        self.task_instance = context['task_instance']
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)  # type: ignore[attr-defined]
        self.log.info(
            "Qubole command created with Id: %s and Status: %s",
            self.cmd.id,  # type: ignore[attr-defined]
            self.cmd.status,  # type: ignore[attr-defined]
        )

        while not Command.is_done(self.cmd.status):  # type: ignore[attr-defined]
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)  # type: ignore[attr-defined]
            self.log.info(
                "Command Id: %s and Status: %s", self.cmd.id, self.cmd.status  # type: ignore[attr-defined]
            )

        if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True:
            self.log.info(
                "Logs for Command Id: %s \n%s", self.cmd.id, self.cmd.get_log()  # type: ignore[attr-defined]
            )

        if self.cmd.status != 'done':  # type: ignore[attr-defined]
            raise AirflowException(
                'Command Id: {} failed with Status: {}'.format(
                    self.cmd.id, self.cmd.status  # type: ignore[attr-defined]
                )
            )
Example #6
0
    def drop_temp_hive_table(self):
        command = None
        create_table_stmt = None
        try:
            create_table_stmt = "DROP TABLE IF EXISTS %s_airflow_temp;" % self.table_name

            command = HiveCommand.run(query=create_table_stmt, label='default')
            self.monitor_command(command, create_table_stmt)
        except Exception as e:
            if command is None:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. No command Id available.\n%s'
                    % (create_table_stmt, e))
            else:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s\n%s'
                    % (create_table_stmt, command.id, e))

        try:
            stmt = "s3cmd -c /usr/lib/hustler/s3cfg rm -rf s3://%s/result_cache/%s_airflow_temp/;" % \
                   (self.s3_bucket, self.table_name)

            command = Command.run(command_type='ShellCommand',
                                  inline=stmt,
                                  label='default')
            self.monitor_command(command, stmt)
        except Exception as e:
            if command is None:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. No command Id available.\n%s'
                    % (create_table_stmt, e))
            else:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s\n%s'
                    % (create_table_stmt, command.id, e))
Example #7
0
def done_qubole(query_id):
    """Sends query_id to Qubole and retrieves
    the data as pandas DataFrame.

    :param int query_id: query_id ready in Qubole
    :return:  pandas DataFrame with response data.
    :rtype: pandas.DataFrame
    """
    with execute_with_handling_errors(config.get_value, 'qubole',
                                      'api_token') as api_token:
        if api_token is None:
            return pd.DataFrame([])

    Qubole.configure(api_token=api_token)

    with execute_with_handling_errors(Command().find, id=query_id) as res:
        if res is None:
            return pd.DataFrame([])

    print("Id: %s, Status: %s" % (str(res.id), res.status))

    try:
        response_buffer = io.BytesIO()
        res.get_results(response_buffer)
        return qubole_output_to_df(response_buffer.getvalue())

    except Exception as e:
        print(e)
        print("Oops!  There was a problem.  Try again...")
        return pd.DataFrame([])
Example #8
0
def qubole_by_id_raw(api_token,hcid,filename):
    Qubole.configure(api_token=api_token)
    cmd = Command.find(hcid)
    out_file = filename + '.csv'
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    return out_file
Example #9
0
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Example #10
0
def qubole_by_id(api_token,hcid,filename):
    Qubole.configure(api_token=api_token)
    cmd = Command.find(hcid)
    out_file = filename + '.csv'
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    df = pd.read_csv(out_file, delimiter='\t')

    return df
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'running':
                    log = LoggingMixin().log
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Example #12
0
def get(query,
        delete_file=True,
        filepath='',
        delimiter=';',
        query_type='presto',
        cluster_label=None):

    with execute_with_handling_errors(config.get_value, 'qubole',
                                      'api_token') as api_token:
        if api_token is None:
            return

    try:
        Qubole.configure(api_token=api_token)
    except UnauthorizedAccess:
        print("Invalid credentials were provided")
        return

    if isinstance(query, int):
        with execute_with_handling_errors(Command().find, id=query) as command:
            if command is None:
                return
    elif query_type == 'presto':
        with execute_with_handling_errors(PrestoCommand.run,
                                          query=query,
                                          label=cluster_label) as command:
            if command is None:
                return
    elif query_type == 'hive':
        with execute_with_handling_errors(HiveCommand.run,
                                          query=query,
                                          label=cluster_label) as command:
            if command is None:
                return
    else:
        print('Please verify your input.')
        return

    if filepath != '':
        file = open(filepath, 'w+')
    else:
        file = tempfile.NamedTemporaryFile(mode='w+', delete=delete_file)

    if command.status == 'done':
        _get_results(command, file, delimiter)
        file.seek(0)

        return file
    else:
        raise Exception(
            'Could not retrieve query results (id: %s, status: %s)' %
            (command.id, command.status))
Example #13
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if(self.configuration['query_type'] == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False,
                                qlog=None, arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')])
                rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Example #14
0
    def handle_failure_retry(context) -> None:
        """Handle retries in case of failures"""
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'done':
                    log.info('Command ID: %s has been succeeded, hence marking this TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Example #15
0
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                log = LoggingMixin().log
                if cmd.status == 'done':
                    log.info('Command ID: %s has been succeeded, hence marking this '
                                'TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
 def check_for_result(migration_number, command):
     """
     A utility method to check the return status of a migration and fail the migration in case of a failure in the
     query.
     :param migration_number: The migration number that you were trying to run.
     :type migration_number: int
     :param command: The instance type of the migration command.
     :type command: Command
     :return: None
     :raise RuntimeError:
     """
     if not Command.is_success(command.status):
         error_message = "Encountered failure while trying to run migration with id {0}. QDS command id that " \
                         "failed was {1} ".format(migration_number, command.id)
         logging.error(error_message)
         raise RuntimeError(error_message)
Example #17
0
    def execute(self, context):
        args = self.cls.parse(self.create_cmd_args(context))
        self.cmd = self.cls.create(**args)
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)
        logging.info("Qubole command created with Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        while not Command.is_done(self.cmd.status):
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)
            logging.info("Command Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True:
            logging.info("Logs for Command Id: {0} \n{1}".format(str(self.cmd.id), self.cmd.get_log()))

        if self.cmd.status != 'done':
            raise AirflowException('Command Id: {0} failed with Status: {1}'.format(self.cmd.id, self.cmd.status))
Example #18
0
    def execute(self, context):
        args = self.cls.parse(self.args)
        self.cmd = self.cls.create(**args)
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)
        logging.info("Qubole command created with Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        while not Command.is_done(self.cmd.status):
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)
            logging.info("Command Id: {0} and Status: {1}".format(str(self.cmd.id), self.cmd.status))

        if self.kwargs.has_key('fetch_logs') and self.kwargs['fetch_logs'] == True:
            logging.info("Logs for Command Id: {0} \n{1}".format(str(self.cmd.id), self.cmd.get_log()))

        if self.cmd.status != 'done':
            raise AirflowException('Command Id: {0} failed with Status: {1}'.format(self.cmd.id, self.cmd.status))
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            logger = logging.getLogger('airflow').getChild("QuboleHook")
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'done':
                    logger.info(
                        'Command ID: %s has been succeeded, hence marking this '
                        'TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    logger.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Example #20
0
def qubole(api_token,sql,replacements,filename):
    Qubole.configure(api_token=api_token)
    with open(sql,'r') as f:
        query = f.read()
        
    label='Trading-spark'
    query = find_replace_multi(query,replacements)
    hc = HiveCommand.run(query=query, label=label)
    cmd = Command.find(hc.id)
    out_file = filename + '.csv'
    
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    df = pd.read_csv(out_file, delimiter='\t')

    return df
Example #21
0
    def execute(self, context):
        args = self.cls.parse(self.create_cmd_args(context))
        self.cmd = self.cls.create(**args)
        context['task_instance'].xcom_push(key='qbol_cmd_id', value=self.cmd.id)
        _log.info("Qubole command created with Id: %s and Status: %s",
                     self.cmd.id, self.cmd.status)

        while not Command.is_done(self.cmd.status):
            time.sleep(Qubole.poll_interval)
            self.cmd = self.cls.find(self.cmd.id)
            _log.info("Command Id: %s and Status: %s", self.cmd.id, self.cmd.status)

        if 'fetch_logs' in self.kwargs and self.kwargs['fetch_logs'] is True:
            _log.info("Logs for Command Id: %s \n%s", self.cmd.id, self.cmd.get_log())

        if self.cmd.status != 'done':
            raise AirflowException('Command Id: {0} failed with Status: {1}'.format(
                                   self.cmd.id, self.cmd.status))
Example #22
0
 def kill_command(self, qubole_jid):
     """Kills a qubole job with the given job_id."""
     self._configure_qubole()
     qubole_jid = int(qubole_jid)
     Command.cancel_id(qubole_jid)
Example #23
0
    def run_query(self, query, user):
        qbol.configure(
            api_token=self.configuration.get("token"),
            api_url="%s/api" % self.configuration.get("endpoint"),
        )

        try:
            query_type = self.configuration.get("query_type", "hive")

            if query_type == "quantum":
                cmd = SqlCommand.create(query=query)
            elif query_type == "hive":
                cmd = HiveCommand.create(
                    query=query, label=self.configuration.get("cluster"))
            elif query_type == "presto":
                cmd = PrestoCommand.create(
                    query=query, label=self.configuration.get("cluster"))
            else:
                raise Exception("Invalid Query Type:%s.\
                        It must be : hive / presto / quantum." %
                                self.configuration.get("query_type"))

            logging.info("Qubole command created with Id: %s and Status: %s",
                         cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id,
                             cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == "done":
                fp = StringIO()
                cmd.get_results(
                    fp=fp,
                    inline=True,
                    delim="\t",
                    fetch=False,
                    qlog=None,
                    arguments=["true"],
                )

                results = fp.getvalue()
                fp.close()

                data = results.split("\r\n")
                columns = self.fetch_columns([
                    (i, TYPE_STRING) for i in data.pop(0).split("\t")
                ])
                rows = [
                    dict(
                        zip((column["name"] for column in columns),
                            row.split("\t"))) for row in data
                ]

            json_data = json_dumps({"columns": columns, "rows": rows})
        except KeyboardInterrupt:
            logging.info("Sending KILL signal to Qubole Command Id: %s",
                         cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Example #24
0
 def kill_command(self, qubole_jid):
     """Kills a qubole job with the given job_id."""
     self._configure_qubole()
     qubole_jid = int(qubole_jid)
     Command.cancel_id(qubole_jid)