def handle_cursor( # pylint: disable=too-many-locals cls, cursor: Any, query: Query, session: Session ) -> None: """Updates progress information""" from pyhive import hive # pylint: disable=no-name-in-module unfinished_states = ( hive.ttypes.TOperationState.INITIALIZED_STATE, hive.ttypes.TOperationState.RUNNING_STATE, ) polled = cursor.poll() last_log_line = 0 tracking_url = None job_id = None query_id = query.id while polled.operationState in unfinished_states: query = session.query(type(query)).filter_by(id=query_id).one() if query.status == QueryStatus.STOPPED: cursor.cancel() break log = cursor.fetch_logs() or "" if log: log_lines = log.splitlines() progress = cls.progress(log_lines) logger.info(f"Query {query_id}: Progress total: {progress}") needs_commit = False if progress > query.progress: query.progress = progress needs_commit = True if not tracking_url: tracking_url = cls.get_tracking_url(log_lines) if tracking_url: job_id = tracking_url.split("/")[-2] logger.info( f"Query {query_id}: Found the tracking url: {tracking_url}" ) tracking_url = tracking_url_trans(tracking_url) logger.info( f"Query {query_id}: Transformation applied: {tracking_url}" ) query.tracking_url = tracking_url logger.info(f"Query {query_id}: Job id: {job_id}") needs_commit = True if job_id and len(log_lines) > last_log_line: # Wait for job id before logging things out # this allows for prefixing all log lines and becoming # searchable in something like Kibana for l in log_lines[last_log_line:]: logger.info(f"Query {query_id}: [{job_id}] {l}") last_log_line = len(log_lines) if needs_commit: session.commit() time.sleep(hive_poll_interval) polled = cursor.poll()
def handle_cursor(cls, cursor: Any, query: Query, session: Session) -> None: """Updates progress information""" query_id = query.id poll_interval = query.database.connect_args.get( "poll_interval", current_app.config["PRESTO_POLL_INTERVAL"] ) logger.info("Query %i: Polling the cursor for progress", query_id) polled = cursor.poll() # poll returns dict -- JSON status information or ``None`` # if the query is done # https://github.com/dropbox/PyHive/blob/ # b34bdbf51378b3979eaf5eca9e956f06ddc36ca0/pyhive/presto.py#L178 while polled: # Update the object and wait for the kill signal. stats = polled.get("stats", {}) query = session.query(type(query)).filter_by(id=query_id).one() if query.status in [QueryStatus.STOPPED, QueryStatus.TIMED_OUT]: cursor.cancel() break if stats: state = stats.get("state") # if already finished, then stop polling if state == "FINISHED": break completed_splits = float(stats.get("completedSplits")) total_splits = float(stats.get("totalSplits")) if total_splits and completed_splits: progress = 100 * (completed_splits / total_splits) logger.info( "Query {} progress: {} / {} " # pylint: disable=logging-format-interpolation "splits".format(query_id, completed_splits, total_splits) ) if progress > query.progress: query.progress = progress session.commit() time.sleep(poll_interval) logger.info("Query %i: Polling the cursor for progress", query_id) polled = cursor.poll()
def handle_cursor( # pylint: disable=too-many-locals cls, cursor: Any, query: Query, session: Session) -> None: """Updates progress information""" # pylint: disable=import-outside-toplevel from pyhive import hive unfinished_states = ( hive.ttypes.TOperationState.INITIALIZED_STATE, hive.ttypes.TOperationState.RUNNING_STATE, ) polled = cursor.poll() last_log_line = 0 tracking_url = None job_id = None query_id = query.id while polled.operationState in unfinished_states: # Queries don't terminate when user clicks the STOP button on SQL LAB. # Refresh session so that the `query.status` modified in stop_query in # views/core.py is reflected here. session.refresh(query) query = session.query(type(query)).filter_by(id=query_id).one() if query.status == QueryStatus.STOPPED: cursor.cancel() break try: log = cursor.fetch_logs() or "" except Exception: # pylint: disable=broad-except logger.warning("Call to GetLog() failed") log = "" if log: log_lines = log.splitlines() progress = cls.progress(log_lines) logger.info("Query %s: Progress total: %s", str(query_id), str(progress)) needs_commit = False if progress > query.progress: query.progress = progress needs_commit = True if not tracking_url: tracking_url = cls.get_tracking_url(log_lines) if tracking_url: job_id = tracking_url.split("/")[-2] logger.info( "Query %s: Found the tracking url: %s", str(query_id), tracking_url, ) transformer = current_app.config[ "TRACKING_URL_TRANSFORMER"] tracking_url = transformer(tracking_url) logger.info( "Query %s: Transformation applied: %s", str(query_id), tracking_url, ) query.tracking_url = tracking_url logger.info("Query %s: Job id: %s", str(query_id), str(job_id)) needs_commit = True if job_id and len(log_lines) > last_log_line: # Wait for job id before logging things out # this allows for prefixing all log lines and becoming # searchable in something like Kibana for l in log_lines[last_log_line:]: logger.info("Query %s: [%s] %s", str(query_id), str(job_id), l) last_log_line = len(log_lines) if needs_commit: session.commit() time.sleep(current_app.config["HIVE_POLL_INTERVAL"]) polled = cursor.poll()
def handle_cursor( # pylint: disable=too-many-locals cls, cursor: Any, query: Query, session: Session) -> None: """Updates progress information""" from pyhive import hive unfinished_states = ( hive.ttypes.TOperationState.INITIALIZED_STATE, hive.ttypes.TOperationState.RUNNING_STATE, ) polled = cursor.poll() last_log_line = 0 tracking_url = None job_id = None query_id = query.id while polled.operationState in unfinished_states: query = session.query(type(query)).filter_by(id=query_id).one() if query.status == QueryStatus.STOPPED: cursor.cancel() break log = cursor.fetch_logs() or "" if log: log_lines = log.splitlines() progress = cls.progress(log_lines) logger.info("Query %s: Progress total: %s", str(query_id), str(progress)) needs_commit = False if progress > query.progress: query.progress = progress needs_commit = True if not tracking_url: tracking_url = cls.get_tracking_url(log_lines) if tracking_url: job_id = tracking_url.split("/")[-2] logger.info( "Query %s: Found the tracking url: %s", str(query_id), tracking_url, ) tracking_url = current_app.config[ "TRACKING_URL_TRANSFORMER"] logger.info( "Query %s: Transformation applied: %s", str(query_id), tracking_url, ) query.tracking_url = tracking_url logger.info("Query %s: Job id: %s", str(query_id), str(job_id)) needs_commit = True if job_id and len(log_lines) > last_log_line: # Wait for job id before logging things out # this allows for prefixing all log lines and becoming # searchable in something like Kibana for l in log_lines[last_log_line:]: logger.info("Query %s: [%s] %s", str(query_id), str(job_id), l) last_log_line = len(log_lines) if needs_commit: session.commit() time.sleep(current_app.config["HIVE_POLL_INTERVAL"]) polled = cursor.poll()