def get_columns(self, connection, table_name, schema=None, **kw): # Extend types supported by PrestoDialect as defined in PyHive type_map = { 'bigint': sql_types.BigInteger, 'integer': sql_types.Integer, 'boolean': sql_types.Boolean, 'double': sql_types.Float, 'varchar': sql_types.String, 'timestamp': sql_types.TIMESTAMP, 'date': sql_types.DATE, 'array<bigint>': sql_types.ARRAY(sql_types.Integer), 'array<varchar>': sql_types.ARRAY(sql_types.String) } rows = self._get_table_columns(connection, table_name, schema) result = [] for row in rows: try: coltype = type_map[row.Type] except KeyError: logger.warn("Did not recognize type '%s' of column '%s'" % (row.Type, row.Column)) coltype = sql_types.NullType result.append({ 'name': row.Column, 'type': coltype, # newer Presto no longer includes this column 'nullable': getattr(row, 'Null', True), 'default': None, }) return result
def _execute(self, statement, cursor, wait, session_properties): """ If something goes wrong, `PrestoClient` will attempt to parse the error log and present the user with useful debugging information. If that fails, the full traceback will be raised instead. """ from pyhive import presto # Imported here due to slow import performance in Python 3 from pyhive.exc import DatabaseError # Imported here due to slow import performance in Python 3 try: cursor = cursor or presto.Cursor( host=self.host, port=self.port, username=self.username, password=self.password, catalog=self.catalog, schema=self.schema, session_props=session_properties, poll_interval=1, source=self.source, protocol=self.server_protocol ) cursor.execute(statement) status = cursor.poll() if wait: logger.progress(0) # status None means command executed successfully # See https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py#L234 while status is not None and status['stats']['state'] != "FINISHED": if status['stats'].get('totalSplits', 0) > 0: pct_complete = round(status['stats']['completedSplits'] / float(status['stats']['totalSplits']), 4) logger.progress(pct_complete * 100) status = cursor.poll() logger.progress(100, complete=True) return cursor except (DatabaseError, pandas.io.sql.DatabaseError) as e: # Attempt to parse database error, before ultimately reraising the same # exception, maintaining the full stacktrace. exception, exception_args, traceback = sys.exc_info() try: message = e.args[0] if isinstance(message, six.string_types): message = ast.literal_eval(re.match("[^{]*({.*})[^}]*$", message).group(1)) linenumber = message['errorLocation']['lineNumber'] - 1 splt = statement.splitlines() splt[linenumber] += ' <-- {errorType} ({errorName}) occurred. {message} '.format(**message) context = '\n\n[Error Context]\n{}\n'.format('\n'.join([splt[l] for l in range(max(linenumber - 1, 0), min(linenumber + 2, len(splt)))])) class ErrContext(object): def __repr__(self): return context # logged twice so that both notebook and console users see the error context exception_args.args = [exception_args, ErrContext()] logger.error(context) except: logger.warn(("Omniduct was unable to parse the database error messages. Refer to the " "traceback below for full error details.")) if isinstance(exception, type): exception = exception(exception_args) raise_with_traceback(exception, traceback)
def _execute(self, statement, query=True, cursor=None, wait=False): from pyhive.exc import DatabaseError # Imported here due to slow import performance in Python 3 try: cursor = cursor or self.__presto.cursor() cursor.execute(statement) status = cursor.poll() if wait or query: logger.progress(0) while status['stats']['state'] != "FINISHED": if status['stats'].get('totalSplits', 0) > 0: pct_complete = round( status['stats']['completedSplits'] / float(status['stats']['totalSplits']), 4) logger.progress(pct_complete * 100) status = cursor.poll() logger.progress(100, complete=True) return cursor except (DatabaseError, pandas.io.sql.DatabaseError) as e: # Attempt to parse database error, before ultimately reraising the same # exception, maintaining the full stacktrace. exception, exception_args, traceback = sys.exc_info() try: message = e.args[0] if isinstance(message, str): message = ast.literal_eval( re.match("[^{]*({.*})[^}]*$", e.message).group(1)) linenumber = message['errorLocation']['lineNumber'] - 1 splt = statement.splitlines() splt[ linenumber] += ' <-- {errorType} ({errorName}) occurred. {message} '.format( **message) context = '\n\n[Error Context]\n{}\n'.format('\n'.join([ splt[l] for l in range(max(linenumber - 1, 0), min(linenumber + 2, len(splt))) ])) class ErrContext(object): def __repr__(self): return context # logged twice so that both notebook and console users see the error context exception_args.args = [exception_args, ErrContext()] logger.error(context) except: logger.warn(( "Omniduct was unable to parse the database error messages. Refer to the " "traceback below for full error details.")) if isinstance(exception, type): exception = exception(exception_args) raise_with_traceback(exception, traceback)
def __setattr__(self, key, value): try: if (getattr(self, '_Duct__prepared', False) and getattr(self, 'connection_fields', None) and key in self.connection_fields and self.is_connected()): logger.warn('Disconnecting prior to changing field that connection is based on: {}.'.format(key)) self.disconnect() self.__prepared = False except AttributeError: pass object.__setattr__(self, key, value)
def _execute(self, statement, cursor, wait, session_properties): """ If something goes wrong, `PrestoClient` will attempt to parse the error log and present the user with useful debugging information. If that fails, the full traceback will be raised instead. """ from pyhive import presto # Imported here due to slow import performance in Python 3 from pyhive.exc import DatabaseError # Imported here due to slow import performance in Python 3 try: cursor = cursor or presto.Cursor(host=self.host, port=self.port, username=self.username, password=self.password, catalog=self.catalog, schema=self.schema, session_props=session_properties, poll_interval=1, source=self.source, protocol=self.server_protocol) cursor.execute(statement) status = cursor.poll() if wait: logger.progress(0) # status None means command executed successfully # See https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py#L234 while status is not None and status['stats'][ 'state'] != "FINISHED": if status['stats'].get('totalSplits', 0) > 0: pct_complete = round( status['stats']['completedSplits'] / float(status['stats']['totalSplits']), 4) logger.progress(pct_complete * 100) status = cursor.poll() logger.progress(100, complete=True) return cursor except (DatabaseError, pandas.io.sql.DatabaseError) as e: # Attempt to parse database error, before ultimately reraising the same # exception, maintaining the full stacktrace. exception, exception_args, traceback = sys.exc_info() try: message = e.args[0] if isinstance(message, six.string_types): message = ast.literal_eval( re.match("[^{]*({.*})[^}]*$", message).group(1)) linenumber = message['errorLocation']['lineNumber'] - 1 splt = statement.splitlines() splt[ linenumber] += ' <-- {errorType} ({errorName}) occurred. {message} '.format( **message) context = '\n\n[Error Context]\n{}\n'.format('\n'.join([ splt[l] for l in range(max(linenumber - 1, 0), min(linenumber + 2, len(splt))) ])) class ErrContext(object): def __repr__(self): return context # logged twice so that both notebook and console users see the error context exception_args.args = [exception_args, ErrContext()] logger.error(context) except: logger.warn(( "Omniduct was unable to parse the database error messages. Refer to the " "traceback below for full error details.")) if isinstance(exception, type): exception = exception(exception_args) raise_with_traceback(exception, traceback)