コード例 #1
0
ファイル: _schemas.py プロジェクト: naoyak/omniduct
    def get_columns(self, connection, table_name, schema=None, **kw):
        # Extend types supported by PrestoDialect as defined in PyHive
        type_map = {
            'bigint': sql_types.BigInteger,
            'integer': sql_types.Integer,
            'boolean': sql_types.Boolean,
            'double': sql_types.Float,
            'varchar': sql_types.String,
            'timestamp': sql_types.TIMESTAMP,
            'date': sql_types.DATE,
            'array<bigint>': sql_types.ARRAY(sql_types.Integer),
            'array<varchar>': sql_types.ARRAY(sql_types.String)
        }

        rows = self._get_table_columns(connection, table_name, schema)
        result = []
        for row in rows:
            try:
                coltype = type_map[row.Type]
            except KeyError:
                logger.warn("Did not recognize type '%s' of column '%s'" %
                            (row.Type, row.Column))
                coltype = sql_types.NullType
            result.append({
                'name': row.Column,
                'type': coltype,
                # newer Presto no longer includes this column
                'nullable': getattr(row, 'Null', True),
                'default': None,
            })
        return result
コード例 #2
0
ファイル: _schemas.py プロジェクト: djKooks/omniduct
    def get_columns(self, connection, table_name, schema=None, **kw):
        # Extend types supported by PrestoDialect as defined in PyHive
        type_map = {
            'bigint': sql_types.BigInteger,
            'integer': sql_types.Integer,
            'boolean': sql_types.Boolean,
            'double': sql_types.Float,
            'varchar': sql_types.String,
            'timestamp': sql_types.TIMESTAMP,
            'date': sql_types.DATE,
            'array<bigint>': sql_types.ARRAY(sql_types.Integer),
            'array<varchar>': sql_types.ARRAY(sql_types.String)
        }

        rows = self._get_table_columns(connection, table_name, schema)
        result = []
        for row in rows:
            try:
                coltype = type_map[row.Type]
            except KeyError:
                logger.warn("Did not recognize type '%s' of column '%s'" % (row.Type, row.Column))
                coltype = sql_types.NullType
            result.append({
                'name': row.Column,
                'type': coltype,
                # newer Presto no longer includes this column
                'nullable': getattr(row, 'Null', True),
                'default': None,
            })
        return result
コード例 #3
0
ファイル: presto.py プロジェクト: djKooks/omniduct
    def _execute(self, statement, cursor, wait, session_properties):
        """
        If something goes wrong, `PrestoClient` will attempt to parse the error
        log and present the user with useful debugging information. If that fails,
        the full traceback will be raised instead.
        """
        from pyhive import presto  # Imported here due to slow import performance in Python 3
        from pyhive.exc import DatabaseError  # Imported here due to slow import performance in Python 3
        try:
            cursor = cursor or presto.Cursor(
                host=self.host, port=self.port, username=self.username, password=self.password,
                catalog=self.catalog, schema=self.schema, session_props=session_properties,
                poll_interval=1, source=self.source, protocol=self.server_protocol
            )
            cursor.execute(statement)
            status = cursor.poll()
            if wait:
                logger.progress(0)
                # status None means command executed successfully
                # See https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py#L234
                while status is not None and status['stats']['state'] != "FINISHED":
                    if status['stats'].get('totalSplits', 0) > 0:
                        pct_complete = round(status['stats']['completedSplits'] / float(status['stats']['totalSplits']), 4)
                        logger.progress(pct_complete * 100)
                    status = cursor.poll()
                logger.progress(100, complete=True)
            return cursor
        except (DatabaseError, pandas.io.sql.DatabaseError) as e:
            # Attempt to parse database error, before ultimately reraising the same
            # exception, maintaining the full stacktrace.
            exception, exception_args, traceback = sys.exc_info()

            try:
                message = e.args[0]
                if isinstance(message, six.string_types):
                    message = ast.literal_eval(re.match("[^{]*({.*})[^}]*$", message).group(1))

                linenumber = message['errorLocation']['lineNumber'] - 1
                splt = statement.splitlines()
                splt[linenumber] += '   <--  {errorType} ({errorName}) occurred. {message} '.format(**message)
                context = '\n\n[Error Context]\n{}\n'.format('\n'.join([splt[l] for l in range(max(linenumber - 1, 0),
                                                                                               min(linenumber + 2, len(splt)))]))

                class ErrContext(object):

                    def __repr__(self):
                        return context

                # logged twice so that both notebook and console users see the error context
                exception_args.args = [exception_args, ErrContext()]
                logger.error(context)
            except:
                logger.warn(("Omniduct was unable to parse the database error messages. Refer to the "
                             "traceback below for full error details."))

            if isinstance(exception, type):
                exception = exception(exception_args)

            raise_with_traceback(exception, traceback)
コード例 #4
0
    def _execute(self, statement, query=True, cursor=None, wait=False):
        from pyhive.exc import DatabaseError  # Imported here due to slow import performance in Python 3
        try:
            cursor = cursor or self.__presto.cursor()
            cursor.execute(statement)
            status = cursor.poll()
            if wait or query:
                logger.progress(0)
                while status['stats']['state'] != "FINISHED":
                    if status['stats'].get('totalSplits', 0) > 0:
                        pct_complete = round(
                            status['stats']['completedSplits'] /
                            float(status['stats']['totalSplits']), 4)
                        logger.progress(pct_complete * 100)
                    status = cursor.poll()
                logger.progress(100, complete=True)
            return cursor
        except (DatabaseError, pandas.io.sql.DatabaseError) as e:
            # Attempt to parse database error, before ultimately reraising the same
            # exception, maintaining the full stacktrace.
            exception, exception_args, traceback = sys.exc_info()

            try:
                message = e.args[0]
                if isinstance(message, str):
                    message = ast.literal_eval(
                        re.match("[^{]*({.*})[^}]*$", e.message).group(1))

                linenumber = message['errorLocation']['lineNumber'] - 1
                splt = statement.splitlines()
                splt[
                    linenumber] += '   <--  {errorType} ({errorName}) occurred. {message} '.format(
                        **message)
                context = '\n\n[Error Context]\n{}\n'.format('\n'.join([
                    splt[l]
                    for l in range(max(linenumber -
                                       1, 0), min(linenumber + 2, len(splt)))
                ]))

                class ErrContext(object):
                    def __repr__(self):
                        return context

                # logged twice so that both notebook and console users see the error context
                exception_args.args = [exception_args, ErrContext()]
                logger.error(context)
            except:
                logger.warn((
                    "Omniduct was unable to parse the database error messages. Refer to the "
                    "traceback below for full error details."))

            if isinstance(exception, type):
                exception = exception(exception_args)

            raise_with_traceback(exception, traceback)
コード例 #5
0
ファイル: duct.py プロジェクト: yunstanford/omniduct
 def __setattr__(self, key, value):
     try:
         if (getattr(self, '_Duct__prepared', False)
                 and getattr(self, 'connection_fields', None)
                 and key in self.connection_fields
                 and self.is_connected()):
             logger.warn('Disconnecting prior to changing field that connection is based on: {}.'.format(key))
             self.disconnect()
             self.__prepared = False
     except AttributeError:
         pass
     object.__setattr__(self, key, value)
コード例 #6
0
ファイル: duct.py プロジェクト: djKooks/omniduct
 def __setattr__(self, key, value):
     try:
         if (getattr(self, '_Duct__prepared', False)
                 and getattr(self, 'connection_fields', None)
                 and key in self.connection_fields
                 and self.is_connected()):
             logger.warn('Disconnecting prior to changing field that connection is based on: {}.'.format(key))
             self.disconnect()
             self.__prepared = False
     except AttributeError:
         pass
     object.__setattr__(self, key, value)
コード例 #7
0
    def _execute(self, statement, cursor, wait, session_properties):
        """
        If something goes wrong, `PrestoClient` will attempt to parse the error
        log and present the user with useful debugging information. If that fails,
        the full traceback will be raised instead.
        """
        from pyhive import presto  # Imported here due to slow import performance in Python 3
        from pyhive.exc import DatabaseError  # Imported here due to slow import performance in Python 3
        try:
            cursor = cursor or presto.Cursor(host=self.host,
                                             port=self.port,
                                             username=self.username,
                                             password=self.password,
                                             catalog=self.catalog,
                                             schema=self.schema,
                                             session_props=session_properties,
                                             poll_interval=1,
                                             source=self.source,
                                             protocol=self.server_protocol)
            cursor.execute(statement)
            status = cursor.poll()
            if wait:
                logger.progress(0)
                # status None means command executed successfully
                # See https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py#L234
                while status is not None and status['stats'][
                        'state'] != "FINISHED":
                    if status['stats'].get('totalSplits', 0) > 0:
                        pct_complete = round(
                            status['stats']['completedSplits'] /
                            float(status['stats']['totalSplits']), 4)
                        logger.progress(pct_complete * 100)
                    status = cursor.poll()
                logger.progress(100, complete=True)
            return cursor
        except (DatabaseError, pandas.io.sql.DatabaseError) as e:
            # Attempt to parse database error, before ultimately reraising the same
            # exception, maintaining the full stacktrace.
            exception, exception_args, traceback = sys.exc_info()

            try:
                message = e.args[0]
                if isinstance(message, six.string_types):
                    message = ast.literal_eval(
                        re.match("[^{]*({.*})[^}]*$", message).group(1))

                linenumber = message['errorLocation']['lineNumber'] - 1
                splt = statement.splitlines()
                splt[
                    linenumber] += '   <--  {errorType} ({errorName}) occurred. {message} '.format(
                        **message)
                context = '\n\n[Error Context]\n{}\n'.format('\n'.join([
                    splt[l]
                    for l in range(max(linenumber -
                                       1, 0), min(linenumber + 2, len(splt)))
                ]))

                class ErrContext(object):
                    def __repr__(self):
                        return context

                # logged twice so that both notebook and console users see the error context
                exception_args.args = [exception_args, ErrContext()]
                logger.error(context)
            except:
                logger.warn((
                    "Omniduct was unable to parse the database error messages. Refer to the "
                    "traceback below for full error details."))

            if isinstance(exception, type):
                exception = exception(exception_args)

            raise_with_traceback(exception, traceback)