Beispiel #1
0
    def monitor_command(self, command, sql_stmt):
        _command = command
        time.sleep(10)
        try:
            _command = Command.find(_command.id)
        except:
            time.sleep(30)
            _command = Command.find(_command.id)

        total_sleep_time = 0
        retries = 1000
        command_id = _command.id
        for i in range(retries):
            if _command.status == 'error':
                raise AirflowException(
                    'Statement failed: https://api.qubole.com/v2/analyze?command_id=%s\n %s'
                    % (command_id, sql_stmt))
            elif Command.is_done(_command.status):
                return
            else:
                total_sleep_time += 10
                if total_sleep_time > self.expected_runtime * 1.5:
                    raise AirflowException(
                        "RS Total estimated runtime was exceeded, please adjust estimation in DAG if the process requires more time to complete query %s"
                        % sql_stmt)
                time.sleep(10)
                _command = Command.find(command_id)

        raise AirflowException(
            'RS_monitor_command call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s'
            % (sql_stmt, command.id))
Beispiel #2
0
def qubole_by_id_raw(api_token,hcid,filename):
    Qubole.configure(api_token=api_token)
    cmd = Command.find(hcid)
    out_file = filename + '.csv'
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    return out_file
Beispiel #3
0
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Beispiel #4
0
def qubole_by_id(api_token,hcid,filename):
    Qubole.configure(api_token=api_token)
    cmd = Command.find(hcid)
    out_file = filename + '.csv'
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    df = pd.read_csv(out_file, delimiter='\t')

    return df
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'running':
                    log = LoggingMixin().log
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Beispiel #6
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if (self.configuration['query_type']
                                    == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s",
                         cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id,
                             cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp,
                                inline=True,
                                delim='\t',
                                fetch=False,
                                qlog=None,
                                arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([
                    (i, TYPE_STRING) for i in data.pop(0).split('\t')
                ])
                rows = [
                    dict(zip((c['name'] for c in columns), row.split('\t')))
                    for row in data
                ]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s',
                         cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Beispiel #7
0
    def handle_failure_retry(context) -> None:
        """Handle retries in case of failures"""
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'done':
                    log.info('Command ID: %s has been succeeded, hence marking this TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Beispiel #8
0
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                log = LoggingMixin().log
                if cmd.status == 'done':
                    log.info('Command ID: %s has been succeeded, hence marking this '
                                'TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            logger = logging.getLogger('airflow').getChild("QuboleHook")
            cmd = Command.find(cmd_id)
            if cmd is not None:
                if cmd.status == 'done':
                    logger.info(
                        'Command ID: %s has been succeeded, hence marking this '
                        'TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    logger.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
Beispiel #10
0
def qubole(api_token,sql,replacements,filename):
    Qubole.configure(api_token=api_token)
    with open(sql,'r') as f:
        query = f.read()
        
    label='Trading-spark'
    query = find_replace_multi(query,replacements)
    hc = HiveCommand.run(query=query, label=label)
    cmd = Command.find(hc.id)
    out_file = filename + '.csv'
    
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    df = pd.read_csv(out_file, delimiter='\t')

    return df
Beispiel #11
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if(self.configuration['query_type'] == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False,
                                qlog=None, arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')])
                rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Beispiel #12
0
    def run_query(self, query, user):
        qbol.configure(
            api_token=self.configuration.get("token"),
            api_url="%s/api" % self.configuration.get("endpoint"),
        )

        try:
            query_type = self.configuration.get("query_type", "hive")

            if query_type == "quantum":
                cmd = SqlCommand.create(query=query)
            elif query_type == "hive":
                cmd = HiveCommand.create(
                    query=query, label=self.configuration.get("cluster"))
            elif query_type == "presto":
                cmd = PrestoCommand.create(
                    query=query, label=self.configuration.get("cluster"))
            else:
                raise Exception("Invalid Query Type:%s.\
                        It must be : hive / presto / quantum." %
                                self.configuration.get("query_type"))

            logging.info("Qubole command created with Id: %s and Status: %s",
                         cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id,
                             cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == "done":
                fp = StringIO()
                cmd.get_results(
                    fp=fp,
                    inline=True,
                    delim="\t",
                    fetch=False,
                    qlog=None,
                    arguments=["true"],
                )

                results = fp.getvalue()
                fp.close()

                data = results.split("\r\n")
                columns = self.fetch_columns([
                    (i, TYPE_STRING) for i in data.pop(0).split("\t")
                ])
                rows = [
                    dict(
                        zip((column["name"] for column in columns),
                            row.split("\t"))) for row in data
                ]

            json_data = json_dumps({"columns": columns, "rows": rows})
        except KeyboardInterrupt:
            logging.info("Sending KILL signal to Qubole Command Id: %s",
                         cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error