Esempio n. 1
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        self.__underlyingmodule = None  # will be updated next
        self.getunderlyingmodule()  # updates self.__underlyingmodule

        if paramstyle is None:
            paramstyle = self.__underlyingmodule.paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        # Thread-stuff
        self.__cursor = connection.cursor()
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.daemon = True
        t.start()
    def __init__(self, jdbcconn, stmtcachesize=20):
        """Create a ConnectionWrapper around the given JDBC connection.

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set to be the default ConnectionWrapper.

           Arguments:
           - jdbcconn: An open JDBC Connection (not a PEP249 Connection)
           - stmtcachesize: The maximum number of PreparedStatements kept
             open. Default: 20.
        """
        if not isinstance(jdbcconn, jdbc.Connection):
            raise TypeError('1st argument must implement java.sql.Connection')
        if jdbcconn.isClosed():
            raise ValueError('1st argument must be an open Connection')
        self.__jdbcconn = jdbcconn
        # Add a finalizer to __prepstmts to close PreparedStatements when
        # they are pushed out
        self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close())
        self.__resultmeta = FIFODict(stmtcachesize)
        self.__resultset = None
        self.__resultnames = None
        self.__resulttypes = None
        self.nametranslator = lambda s: s
        self.__jdbcconn.setAutoCommit(False)
        if pygrametl._defaulttargetconnection is None:
            pygrametl._defaulttargetconnection = self
Esempio n. 3
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        """Create a ConnectionWrapper around the given PEP 249 connection

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set as the default.

           Arguments:
           - connection: An open PEP 249 connection to the database
           - stmtcachesize: A number deciding how many translated statements to
             cache. A statement needs to be translated when the connection
             does not use 'pyformat' to specify parameters. When 'pyformat' is
             used, stmtcachesize is ignored as no statements need to be
             translated.
           - paramstyle: A string holding the name of the PEP 249 connection's
             paramstyle. If None, pygrametl will try to find the paramstyle
             automatically (an AttributeError can be raised if that fails).
        """
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        if paramstyle is None:
            try:
                paramstyle = \
                    modules[self.__connection.__class__.__module__].paramstyle
            except AttributeError:
                # Note: This is probably a better way to do this, but to avoid
                # to break anything that worked before this fix, we only do it
                # this way if the first approach didn't work
                try:
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                split('.')[0]].paramstyle
                except AttributeError:
                    # To support, e.g., mysql.connector connections
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                rsplit('.', 1)[0]].paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        global _defaulttargetconnection
        if _defaulttargetconnection is None:
            _defaulttargetconnection = self
Esempio n. 4
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None, \
                 copyintonew=False):
        """Create a ConnectionWrapper around the given PEP 249 connection

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set as the default.

           Arguments:

           - connection: An open PEP 249 connection to the database
           - stmtcachesize: A number deciding how many translated statements to
             cache. A statement needs to be translated when the connection
             does not use 'pyformat' to specify parameters. When 'pyformat' is
             used and copyintonew == False, stmtcachesize is ignored as no
             statements need to be translated.
           - paramstyle: A string holding the name of the PEP 249 connection's
             paramstyle. If None, pygrametl will try to find the paramstyle
             automatically (an AttributeError can be raised if that fails).
           - copyintonew: A boolean deciding if a new mapping only holding the
             needed arguments should be created when a statement is executed.
             Some drivers require this.
        """
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        self.__underlyingmodule = None # will be updated next
        self.getunderlyingmodule() # updates self.__underlyingmodule

        if paramstyle is None:
            paramstyle = self.__underlyingmodule.paramstyle

        if copyintonew or not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            # Since paramstyle == 'pyformat' and copyintonew == False,
            # no translations are needed
            self.__translate = None

        self.__paramstyle = paramstyle
        self.__copyintonew = copyintonew

        global _defaulttargetconnection
        if _defaulttargetconnection is None:
            _defaulttargetconnection = self
Esempio n. 5
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        if paramstyle is None:
            try:
                paramstyle = modules[self.__connection.__class__.__module__].paramstyle
            except AttributeError:
                # Note: This is probably a better way to do this, but to avoid
                # to break anything that worked before this fix, we only do it
                # this way if the first approach didn't work
                try:
                    paramstyle = modules[self.__connection.__class__.__module__.split(".")[0]].paramstyle
                except AttributeError:
                    # To support, e.g., mysql.connector connections
                    paramstyle = modules[self.__connection.__class__.__module__.rsplit(".", 1)[0]].paramstyle

        if not paramstyle == "pyformat":
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, "_translate2" + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle)
        else:
            self.__translate = None

        # Thread-stuff
        self.__cursor = connection.cursor()
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.daemon = True
        t.start()
Esempio n. 6
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        self.__underlyingmodule = None # will be updated next
        self.getunderlyingmodule() # updates self.__underlyingmodule

        if paramstyle is None:
            paramstyle = self.__underlyingmodule.paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        # Thread-stuff
        self.__cursor = connection.cursor()
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.daemon = True
        t.start()
Esempio n. 7
0
 def __init__(self, jdbcconn, stmtcachesize=20):
     """Create a ConnectionWrapper around the given JDBC connection """
     self.__jdbcconn = jdbcconn
     # Add a finalizer to __prepstmts to close PreparedStatements when
     # they are pushed out
     self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close())
     self.__resultmeta = FIFODict(stmtcachesize)
     self.__resultset = None
     self.__resultnames = None
     self.__resulttypes = None
     self.nametranslator = lambda s: s
     self.__jdbcconn.setAutoCommit(False)
     self.__queue = Queue(5000)
     t = Thread(target=self.__worker)
     t.setDaemon(True)  # NB: "t.daemon = True" does NOT work...
     t.start()
Esempio n. 8
0
    def __init__(self, jdbcconn, stmtcachesize=20):
        """Create a ConnectionWrapper around the given JDBC connection.

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set to be the default ConnectionWrapper.
        """
        self.__jdbcconn = jdbcconn
        # Add a finalizer to __prepstmts to close PreparedStatements when
        # they are pushed out
        self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close())
        self.__resultmeta = FIFODict(stmtcachesize)
        self.__resultset = None
        self.__resultnames = None
        self.__resulttypes = None
        self.nametranslator = lambda s: s
        self.__jdbcconn.setAutoCommit(False)
        if pygrametl._defaulttargetconnection is None:
            pygrametl._defaulttargetconnection = self
Esempio n. 9
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        """Create a ConnectionWrapper around the given PEP 249 connection

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set as the default.

           Arguments:
           - connection: An open PEP 249 connection to the database
           - stmtcachesize: A number deciding how many translated statements to
             cache. A statement needs to be translated when the connection
             does not use 'pyformat' to specify parameters. When 'pyformat' is
             used, stmtcachesize is ignored as no statements need to be
             translated.
           - paramstyle: A string holding the name of the PEP 249 connection's
             paramstyle. If None, pygrametl will try to find the paramstyle
             automatically (an AttributeError can be raised if that fails).
        """
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        if paramstyle is None:
            try:
                paramstyle = \
                    modules[self.__connection.__class__.__module__].paramstyle
            except AttributeError:
                # Note: This is probably a better way to do this, but to avoid
                # to break anything that worked before this fix, we only do it
                # this way if the first approach didn't work
                try:
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                split('.')[0]].paramstyle
                except AttributeError:
                    # To support, e.g., mysql.connector connections
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                rsplit('.', 1)[0]].paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        global _defaulttargetconnection
        if _defaulttargetconnection is None:
            _defaulttargetconnection = self
Esempio n. 10
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        if paramstyle is None:
            try:
                paramstyle = \
                    modules[self.__connection.__class__.__module__].paramstyle
            except AttributeError:
                # Note: This is probably a better way to do this, but to avoid
                # to break anything that worked before this fix, we only do it
                # this way if the first approach didn't work
                try:
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                split('.')[0]].paramstyle
                except AttributeError:
                    # To support, e.g., mysql.connector connections
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                rsplit('.', 1)[0]].paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        # Thread-stuff
        self.__cursor = connection.cursor()
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.daemon = True
        t.start()
Esempio n. 11
0
    def __init__(self, jdbcconn, stmtcachesize=20):
        """Create a ConnectionWrapper around the given JDBC connection

           Arguments:
           - jdbcconn: An open JDBC Connection (not a PEP249 Connection)
           - stmtcachesize: The maximum number of PreparedStatements kept
             open. Default: 20.
        """
        self.__jdbcconn = jdbcconn
        # Add a finalizer to __prepstmts to close PreparedStatements when
        # they are pushed out
        self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close())
        self.__resultmeta = FIFODict(stmtcachesize)
        self.__resultset = None
        self.__resultnames = None
        self.__resulttypes = None
        self.nametranslator = lambda s: s
        self.__jdbcconn.setAutoCommit(False)
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.setDaemon(True)  # NB: "t.daemon = True" does NOT work...
        t.setName('BackgroundJDBCConnectionWrapper')
        t.start()
Esempio n. 12
0
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        """Create a ConnectionWrapper around the given PEP 249 connection

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set as the default.

           Arguments:
           - connection: An open PEP 249 connection to the database
           - stmtcachesize: A number deciding how many translated statements to
             cache. A statement needs to be translated when the connection
             does not use 'pyformat' to specify parameters. When 'pyformat' is
             used, stmtcachesize is ignored as no statements need to be
             translated.
           - paramstyle: A string holding the name of the PEP 249 connection's
             paramstyle. If None, pygrametl will try to find the paramstyle
             automatically (an AttributeError can be raised if that fails).
        """
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        self.__underlyingmodule = None # will be updated next
        self.getunderlyingmodule() # updates self.__underlyingmodule

        if paramstyle is None:
            paramstyle = self.__underlyingmodule.paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        global _defaulttargetconnection
        if _defaulttargetconnection is None:
            _defaulttargetconnection = self
Esempio n. 13
0
class BackgroundConnectionWrapper(object):
    """An alternative implementation of the ConnectionWrapper for experiments.
       This implementation communicates with the database by using a
       separate thread.

       It is likely better to use ConnectionWrapper og a shared
       ConnectionWrapper (see pygrametl.parallel).

       This class offers the same methods as ConnectionWrapper. The
       documentation is not repeated here.
    """
    _SINGLE = 1
    _MANY = 2

    # Most of this class' code was just copied from ConnectionWrapper
    # as we just want to do experiments with this class.

    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        if paramstyle is None:
            try:
                paramstyle = \
                    modules[self.__connection.__class__.__module__].paramstyle
            except AttributeError:
                # Note: This is probably a better way to do this, but to avoid
                # to break anything that worked before this fix, we only do it
                # this way if the first approach didn't work
                try:
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                split('.')[0]].paramstyle
                except AttributeError:
                    # To support, e.g., mysql.connector connections
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                rsplit('.', 1)[0]].paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        # Thread-stuff
        self.__cursor = connection.cursor()
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.daemon = True
        t.start()

    def execute(self, stmt, arguments=None, namemapping=None, translate=True):
        if namemapping and arguments:
            arguments = copy(arguments, **namemapping)
        if self.__translate and translate:
            (stmt, arguments) = self.__translate(stmt, arguments)
        self.__queue.put((self._SINGLE, self.__cursor, stmt, arguments))

    def executemany(self, stmt, params, translate=True):
        if self.__translate and translate:
            # Idea: Translate the statement for the first parameter set. Then
            # reuse the statement (but create new attribute sequences if
            # needed) for the remaining paramter sets
            newstmt = self.__translate(stmt, params[0])[0]
            if isinstance(self.__translations[stmt], str):
                # The paramstyle is 'named' in this case and we don't have to
                # put parameters into sequences
                self.__queue.put((self._MANY, self.__cursor, newstmt, params))
            else:
                # We need to extract attributes and put them into sequences
                # The attributes to extract
                names = self.__translations[stmt][1]
                newparams = [[p[n] for n in names] for p in params]
                self.__queue.put(
                    (self._MANY, self.__cursor, newstmt, newparams))
        else:
            # for pyformat when no translation is necessary
            self.__queue.put((self._MANY, self.__cursor, stmt, params))

    def _translate2named(self, stmt, row=None):
        # Translate %(name)s to :name. No need to change row.
        # Cache only the translated SQL.
        res = self.__translations.get(stmt, None)
        if res:
            return (res, row)
        res = stmt
        while True:
            start = res.find('%(')
            if start == -1:
                break
            end = res.find(')s', start)
            name = res[start + 2:end]
            res = res.replace(res[start:end + 2], ':' + name)
        self.__translations[stmt] = res
        return (res, row)

    def _translate2qmark(self, stmt, row=None):
        # Translate %(name)s to ? and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            name = newstmt[start + 2:end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], '?',
                                      1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2numeric(self, stmt, row=None):
        # Translate %(name)s to 1,2,... and build a list of attributes to
        # extract from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        cnt = 0
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            name = newstmt[start + 2:end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt))
            cnt += 1
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2format(self, stmt, row=None):
        # Translate %(name)s to %s and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            name = newstmt[start + 2:end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], '%s',
                                      1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def rowfactory(self, names=None):
        self.__queue.join()
        rows = self.__cursor
        self.__cursor = self.__connection.cursor()
        if names is None:
            if rows.description is None:  # no query was executed ...
                return (nothing for nothing in [])  # a generator with no rows
            else:
                names = [self.nametranslator(t[0]) for t in rows.description]
        return rowfactory(rows, names, True)

    def fetchone(self, names=None):
        self.__queue.join()
        if self.__cursor.description is None:
            return {}
        if names is None:
            names = [
                self.nametranslator(t[0]) for t in self.__cursor.description
            ]
        values = self.__cursor.fetchone()
        if values is None:
            # A row with each att = None
            return dict([(n, None) for n in names])
        else:
            return dict(zip(names, values))

    def fetchonetuple(self):
        self.__queue.join()
        if self.__cursor.description is None:
            return ()
        values = self.__cursor.fetchone()
        if values is None:
            return (None, ) * len(self.__cursor.description)
        else:
            return values

    def fetchmanytuples(self, cnt):
        self.__queue.join()
        if self.__cursor.description is None:
            return []
        return self.__cursor.fetchmany(cnt)

    def fetchalltuples(self):
        self.__queue.join()
        if self.__cursor.description is not None:
            while True:
                results = self.__cursor.fetchmany(200)
                if not results:
                    break
                for row in results:
                    yield row

    def rowcount(self):
        self.__queue.join()
        return self.__cursor.rowcount

    def getunderlyingmodule(self):
        # No need to join the queue here
        return modules[self.__connection.__class__.__module__]

    def commit(self):
        endload()
        self.__queue.join()
        self.__connection.commit()

    def close(self):
        self.__queue.join()
        self.__connection.close()

    def rollback(self):
        self.__queue.join()
        self.__connection.rollback()

    def setasdefault(self):
        global _defaulttargetconnection
        _defaulttargetconnection = self

    def cursor(self):
        self.__queue.join()
        return self.__connection.cursor()

    def resultnames(self):
        self.__queue.join()
        if self.__cursor.description is None:
            return None
        else:
            return tuple([t[0] for t in self.__cursor.description])

    def __getstate__(self):
        # In case the ConnectionWrapper is pickled (to be sent to another
        # process), we need to create a new cursor when it is unpickled.
        res = self.__dict__.copy()
        del res['_ConnectionWrapper__cursor']  # a dirty trick, but...

    def __setstate__(self, dictdata):
        self.__dict__.update(dictdata)
        self.__cursor = self.__connection.cursor()

    def __worker(self):
        while True:
            (op, curs, stmt, args) = self.__queue.get()
            if op == self._SINGLE:
                curs.execute(stmt, args)
            elif op == self._MANY:
                curs.executemany(stmt, args)
            self.__queue.task_done()
Esempio n. 14
0
class ConnectionWrapper(object):

    """Provide a uniform representation of different database connection types.

       All Dimensions and FactTables communicate with the data warehouse using
       a ConnectionWrapper. In this way, the code for loading the DW does not
       have to care about which parameter format is used.

       pygrametl's code uses the 'pyformat' but the ConnectionWrapper performs
       translations of the SQL to use 'named', 'qmark', 'format', or 'numeric'
       if the user's database connection needs this. Note that the
       translations are simple and naive. Escaping as in %%(name)s is not
       taken into consideration. These simple translations are enough for
       pygrametl's code which is the important thing here; we're not trying to
       make a generic, all-purpose tool to get rid of the problems with
       different parameter formats. It is, however, possible to disable the
       translation of a statement to execute such that 'problematic'
       statements can be executed anyway.
    """

    def __init__(self, connection, stmtcachesize=1000, paramstyle=None, \
                 copyintonew=False):
        """Create a ConnectionWrapper around the given PEP 249 connection

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set as the default.

           Arguments:

           - connection: An open PEP 249 connection to the database
           - stmtcachesize: A number deciding how many translated statements to
             cache. A statement needs to be translated when the connection
             does not use 'pyformat' to specify parameters. When 'pyformat' is
             used and copyintonew == False, stmtcachesize is ignored as no
             statements need to be translated.
           - paramstyle: A string holding the name of the PEP 249 connection's
             paramstyle. If None, pygrametl will try to find the paramstyle
             automatically (an AttributeError can be raised if that fails).
           - copyintonew: A boolean deciding if a new mapping only holding the
             needed arguments should be created when a statement is executed.
             Some drivers require this.
        """
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        self.__underlyingmodule = None # will be updated next
        self.getunderlyingmodule() # updates self.__underlyingmodule

        if paramstyle is None:
            paramstyle = self.__underlyingmodule.paramstyle

        if copyintonew or not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            # Since paramstyle == 'pyformat' and copyintonew == False,
            # no translations are needed
            self.__translate = None

        self.__paramstyle = paramstyle
        self.__copyintonew = copyintonew

        global _defaulttargetconnection
        if _defaulttargetconnection is None:
            _defaulttargetconnection = self

    def execute(self, stmt, arguments=None, namemapping=None, translate=True):
        """Execute a statement.

           Arguments:

           - stmt: the statement to execute
           - arguments: a mapping with the arguments (default: None)
           - namemapping: a mapping of names such that if stmt uses %(arg)s
             and namemapping[arg]=arg2, the value arguments[arg2] is used
             instead of arguments[arg]
           - translate: decides if translation from 'pyformat' to the
             undlying connection's format should take place. Default: True
        """
        if namemapping and arguments:
            arguments = copy(arguments, **namemapping)
        if self.__translate and translate:
            (stmt, arguments) = self.__translate(stmt, arguments)

        if arguments is None:
            # Some drivers don't accept None for 'arguments'
            self.__cursor.execute(stmt)
        else:
            self.__cursor.execute(stmt, arguments)


    def executemany(self, stmt, params, translate=True):
        """Execute a sequence of statements."""
        if self.__translate and translate:
            # Idea: Translate the statement for the first parameter set. Then
            # reuse the statement (but create new attribute sequences if
            # needed) for the remaining parameter sets
            (newstmt, _) = self.__translate(stmt, params[0])
            names = self.__translations[stmt][1]

            if self.__paramstyle == 'pyformat' or self.__paramstyle == 'named':
                if self.__copyintonew:
                    # we need to copy attributes from params into new dicts
                    newparams = [{n:p[n] for n in names} for p in params]
                else:
                    newparams = params
            else:
                # We need to extract attributes and put them into sequences
                newparams = [[p[n] for n in names] for p in params]
        else:
            # nothing to do for pyformat when no translation is necessary
            newstmt = stmt
            newparams = params

        self.__cursor.executemany(newstmt, newparams)

    def _translate2pyformat(self, stmt, row=None):
        # No translation of stmt needed. This method is only used if a new
        # row only containing the required attributes must be made.
        (_, names) = self.__translations.get(stmt, (None, None))
        if names:
            return (stmt, {n:row[n] for n in names})
        elif names == []:
            # there are no arguments to copy
            return (stmt, None)
        names = []
        end = 0
        while True:
            start = stmt.find('%(', end)
            if start == -1:
                break
            end = stmt.find(')s', start)
            if end == -1:
                break
            names.append(stmt[start + 2 : end])
        self.__translations[stmt] = (stmt, names)
        return self._translate2pyformat(stmt, row)


    def _translate2named(self, stmt, row=None):
        # Translate %(name)s to :name. Make new row if self.__copyintonew.
        # Cache the translated SQL and a list of attributes to extract.
        (res, names) = self.__translations.get(stmt, (None, None))
        if res:
            if not self.__copyintonew:
                return (res, row)
            elif names:
                return (res, {n:row[n] for n in names})
            else:
                return (res, None)
        names = []
        res = stmt
        while True:
            start = res.find('%(')
            if start == -1:
                break
            end = res.find(')s', start)
            if end == -1:
                break
            name = res[start + 2: end]
            res = res.replace(res[start:end + 2], ':' + name)
            names.append(name)
        self.__translations[stmt] = (res, names)
        return self._translate2named(stmt, row)

    def _translate2qmark(self, stmt, row=None):
        # Translate %(name)s to ? and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(
                newstmt[start:end + 2], '?', 1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2numeric(self, stmt, row=None):
        # Translate %(name)s to 1,2,... and build a list of attributes to
        # extract from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        cnt = 0
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt))
            cnt += 1
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2format(self, stmt, row=None):
        # Translate %(name)s to %s and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(
                newstmt[start:end + 2], '%s', 1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def rowfactory(self, names=None):
        """Return a generator object returning result rows (i.e. dicts)."""
        rows = self.__cursor
        self.__cursor = self.__connection.cursor()
        if names is None:
            if rows.description is None:  # no query was executed ...
                return (nothing for nothing in [])  # a generator with no rows
            else:
                names = [self.nametranslator(t[0]) for t in rows.description]
        return rowfactory(rows, names, True)

    def fetchone(self, names=None):
        """Return one result row (i.e. dict)."""
        if self.__cursor.description is None:
            return {}
        if names is None:
            names = [self.nametranslator(t[0])
                     for t in self.__cursor.description]
        values = self.__cursor.fetchone()
        if values is None:
            # A row with each att = None
            return dict([(n, None) for n in names])
        else:
            return dict(zip(names, values))

    def fetchonetuple(self):
        """Return one result tuple."""
        if self.__cursor.description is None:
            return ()
        values = self.__cursor.fetchone()
        if values is None:
            return (None, ) * len(self.__cursor.description)
        else:
            return values

    def fetchmanytuples(self, cnt):
        """Return cnt result tuples."""
        if self.__cursor.description is None:
            return []
        return self.__cursor.fetchmany(cnt)

    def fetchalltuples(self):
        """Return all result tuples"""
        if self.__cursor.description is not None:
            while True:
                results = self.__cursor.fetchmany(200)
                if not results:
                    break
                for row in results:
                    yield row

    def rowcount(self):
        """Return the size of the result."""
        return self.__cursor.rowcount

    def getunderlyingmodule(self):
        """Return a reference to the underlying connection's module.

           This is done by considering the connection's __class__'s __module__
           string from right to left (e.g., 'a.b.c', 'a.b', 'a') and looking
           for the attributes 'paramstyle' and 'connect' in the possible modules
        """
        if self.__underlyingmodule is not None:
            return self.__underlyingmodule
        else:
            fullmodname = self.__connection.__class__.__module__
            for i in reversed(range(fullmodname.count('.') + 1)):
                modname = fullmodname.rsplit('.', i)[0]
                try:
                    modref = modules[modname]
                    if hasattr(modref, 'paramstyle') and \
                            hasattr(modref, 'connect'):
                        self.__underlyingmodule = modref
                        return modref
                except KeyError:
                    pass

        return None # We could not finde the module. Raise an Exception instead?


    def commit(self):
        """Commit the transaction."""
        endload()
        self.__connection.commit()

    def close(self):
        """Close the connection to the database,"""
        global _defaulttargetconnection
        if _defaulttargetconnection is self:
            _defaulttargetconnection = None
        self.__connection.close()

    def rollback(self):
        """Rollback the transaction."""
        self.__connection.rollback()

    def setasdefault(self):
        """Set this ConnectionWrapper as the default connection."""
        global _defaulttargetconnection
        _defaulttargetconnection = self

    def cursor(self):
        """Return a cursor object. Optional method."""
        return self.__connection.cursor()

    def resultnames(self):
        if self.__cursor.description is None:
            return None
        else:
            return tuple([t[0] for t in self.__cursor.description])

    def __getstate__(self):
        # In case the ConnectionWrapper is pickled (to be sent to another
        # process), we need to create a new cursor when it is unpickled.
        res = self.__dict__.copy()
        del res['_ConnectionWrapper__cursor']  # a dirty trick, but...
        return res

    def __setstate__(self, dictdata):
        self.__dict__.update(dictdata)
        self.__cursor = self.__connection.cursor()
Esempio n. 15
0
class ConnectionWrapper(object):
    """Provide a uniform representation of different database connection types.

       All Dimensions and FactTables communicate with the data warehouse using
       a ConnectionWrapper. In this way, the code for loading the DW does not
       have to care about which parameter format is used.

       pygrametl's code uses the 'pyformat' but the ConnectionWrapper performs
       translations of the SQL to use 'named', 'qmark', 'format', or 'numeric'
       if the user's database connection needs this. Note that the
       translations are simple and naive. Escaping as in %%(name)s is not
       taken into consideration. These simple translations are enough for
       pygrametl's code which is the important thing here; we're not trying to
       make a generic, all-purpose tool to get rid of the problems with
       different parameter formats. It is, however, possible to disable the
       translation of a statement to execute such that 'problematic'
       statements can be executed anyway.
    """
    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        """Create a ConnectionWrapper around the given PEP 249 connection

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set as the default.

           Arguments:
           - connection: An open PEP 249 connection to the database
           - stmtcachesize: A number deciding how many translated statements to
             cache. A statement needs to be translated when the connection
             does not use 'pyformat' to specify parameters. When 'pyformat' is
             used, stmtcachesize is ignored as no statements need to be
             translated.
           - paramstyle: A string holding the name of the PEP 249 connection's
             paramstyle. If None, pygrametl will try to find the paramstyle
             automatically (an AttributeError can be raised if that fails).
        """
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        if paramstyle is None:
            try:
                paramstyle = \
                    modules[self.__connection.__class__.__module__].paramstyle
            except AttributeError:
                # Note: This is probably a better way to do this, but to avoid
                # to break anything that worked before this fix, we only do it
                # this way if the first approach didn't work
                try:
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                split('.')[0]].paramstyle
                except AttributeError:
                    # To support, e.g., mysql.connector connections
                    paramstyle = \
                        modules[self.__connection.__class__.__module__.
                                rsplit('.', 1)[0]].paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        global _defaulttargetconnection
        if _defaulttargetconnection is None:
            _defaulttargetconnection = self

    def execute(self, stmt, arguments=None, namemapping=None, translate=True):
        """Execute a statement.

           Arguments:
           - stmt: the statement to execute
           - arguments: a mapping with the arguments (default: None)
           - namemapping: a mapping of names such that if stmt uses %(arg)s
             and namemapping[arg]=arg2, the value arguments[arg2] is used
             instead of arguments[arg]
           - translate: decides if translation from 'pyformat' to the
             undlying connection's format should take place. Default: True
        """
        if namemapping and arguments:
            arguments = copy(arguments, **namemapping)
        if self.__translate and translate:
            (stmt, arguments) = self.__translate(stmt, arguments)
        self.__cursor.execute(stmt, arguments)

    def executemany(self, stmt, params, translate=True):
        """Execute a sequence of statements."""
        if self.__translate and translate:
            # Idea: Translate the statement for the first parameter set. Then
            # reuse the statement (but create new attribute sequences if
            # needed) for the remaining paramter sets
            newstmt = self.__translate(stmt, params[0])[0]
            if isinstance(self.__translations[stmt], str):
                # The paramstyle is 'named' in this case and we don't have to
                # put parameters into sequences
                self.__cursor.executemany(newstmt, params)
            else:
                # We need to extract attributes and put them into sequences
                # The attributes to extract
                names = self.__translations[stmt][1]
                newparams = [[p[n] for n in names] for p in params]
                self.__cursor.executemany(newstmt, newparams)
        else:
            # for pyformat when no translation is necessary
            self.__cursor.executemany(stmt, params)

    def _translate2named(self, stmt, row=None):
        # Translate %(name)s to :name. No need to change row.
        # Cache only the translated SQL.
        res = self.__translations.get(stmt, None)
        if res:
            return (res, row)
        res = stmt
        while True:
            start = res.find('%(')
            if start == -1:
                break
            end = res.find(')s', start)
            if end == -1:
                break
            name = res[start + 2:end]
            res = res.replace(res[start:end + 2], ':' + name)
        self.__translations[stmt] = res
        return (res, row)

    def _translate2qmark(self, stmt, row=None):
        # Translate %(name)s to ? and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2:end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], '?',
                                      1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2numeric(self, stmt, row=None):
        # Translate %(name)s to 1,2,... and build a list of attributes to
        # extract from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        cnt = 0
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2:end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt))
            cnt += 1
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2format(self, stmt, row=None):
        # Translate %(name)s to %s and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2:end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], '%s',
                                      1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def rowfactory(self, names=None):
        """Return a generator object returning result rows (i.e. dicts)."""
        rows = self.__cursor
        self.__cursor = self.__connection.cursor()
        if names is None:
            if rows.description is None:  # no query was executed ...
                return (nothing for nothing in [])  # a generator with no rows
            else:
                names = [self.nametranslator(t[0]) for t in rows.description]
        return rowfactory(rows, names, True)

    def fetchone(self, names=None):
        """Return one result row (i.e. dict)."""
        if self.__cursor.description is None:
            return {}
        if names is None:
            names = [
                self.nametranslator(t[0]) for t in self.__cursor.description
            ]
        values = self.__cursor.fetchone()
        if values is None:
            # A row with each att = None
            return dict([(n, None) for n in names])
        else:
            return dict(zip(names, values))

    def fetchonetuple(self):
        """Return one result tuple."""
        if self.__cursor.description is None:
            return ()
        values = self.__cursor.fetchone()
        if values is None:
            return (None, ) * len(self.__cursor.description)
        else:
            return values

    def fetchmanytuples(self, cnt):
        """Return cnt result tuples."""
        if self.__cursor.description is None:
            return []
        return self.__cursor.fetchmany(cnt)

    def fetchalltuples(self):
        """Return all result tuples"""
        if self.__cursor.description is not None:
            while True:
                results = self.__cursor.fetchmany(200)
                if not results:
                    break
                for row in results:
                    yield row

    def rowcount(self):
        """Return the size of the result."""
        return self.__cursor.rowcount

    def getunderlyingmodule(self):
        """Return a reference to the underlying connection's module."""
        return modules[self.__connection.__class__.__module__]

    def commit(self):
        """Commit the transaction."""
        endload()
        self.__connection.commit()

    def close(self):
        """Close the connection to the database,"""
        self.__connection.close()

    def rollback(self):
        """Rollback the transaction."""
        self.__connection.rollback()

    def setasdefault(self):
        """Set this ConnectionWrapper as the default connection."""
        global _defaulttargetconnection
        _defaulttargetconnection = self

    def cursor(self):
        """Return a cursor object. Optional method."""
        return self.__connection.cursor()

    def resultnames(self):
        if self.__cursor.description is None:
            return None
        else:
            return tuple([t[0] for t in self.__cursor.description])

    def __getstate__(self):
        # In case the ConnectionWrapper is pickled (to be sent to another
        # process), we need to create a new cursor when it is unpickled.
        res = self.__dict__.copy()
        del res['_ConnectionWrapper__cursor']  # a dirty trick, but...
        return res

    def __setstate__(self, dictdata):
        self.__dict__.update(dictdata)
        self.__cursor = self.__connection.cursor()
Esempio n. 16
0
class BackgroundConnectionWrapper(object):

    """An alternative implementation of the ConnectionWrapper for experiments.
       This implementation communicates with the database by using a
       separate thread.

       It is likely better to use ConnectionWrapper og a shared
       ConnectionWrapper (see pygrametl.parallel).

       This class offers the same methods as ConnectionWrapper. The
       documentation is not repeated here.
    """

    _SINGLE = 1
    _MANY = 2

    # Most of this class' code was just copied from ConnectionWrapper
    # as we just want to do experiments with this class.

    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        if paramstyle is None:
            try:
                paramstyle = modules[self.__connection.__class__.__module__].paramstyle
            except AttributeError:
                # Note: This is probably a better way to do this, but to avoid
                # to break anything that worked before this fix, we only do it
                # this way if the first approach didn't work
                try:
                    paramstyle = modules[self.__connection.__class__.__module__.split(".")[0]].paramstyle
                except AttributeError:
                    # To support, e.g., mysql.connector connections
                    paramstyle = modules[self.__connection.__class__.__module__.rsplit(".", 1)[0]].paramstyle

        if not paramstyle == "pyformat":
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, "_translate2" + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle)
        else:
            self.__translate = None

        # Thread-stuff
        self.__cursor = connection.cursor()
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.daemon = True
        t.start()

    def execute(self, stmt, arguments=None, namemapping=None, translate=True):
        if namemapping and arguments:
            arguments = copy(arguments, **namemapping)
        if self.__translate and translate:
            (stmt, arguments) = self.__translate(stmt, arguments)
        self.__queue.put((self._SINGLE, self.__cursor, stmt, arguments))

    def executemany(self, stmt, params, translate=True):
        if self.__translate and translate:
            # Idea: Translate the statement for the first parameter set. Then
            # reuse the statement (but create new attribute sequences if needed)
            # for the remaining paramter sets
            newstmt = self.__translate(stmt, params[0])[0]
            if isinstance(self.__translations[stmt], str):
                # The paramstyle is 'named' in this case and we don't have to
                # put parameters into sequences
                self.__queue.put((self._MANY, self.__cursor, newstmt, params))
            else:
                # We need to extract attributes and put them into sequences
                # The attributes to extract
                names = self.__translations[stmt][1]
                newparams = [[p[n] for n in names] for p in params]
                self.__queue.put((self._MANY, self.__cursor, newstmt, newparams))
        else:
            # for pyformat when no translation is necessary
            self.__queue.put((self._MANY, self.__cursor, stmt, params))

    def _translate2named(self, stmt, row=None):
        # Translate %(name)s to :name. No need to change row.
        # Cache only the translated SQL.
        res = self.__translations.get(stmt, None)
        if res:
            return (res, row)
        res = stmt
        while True:
            start = res.find("%(")
            if start == -1:
                break
            end = res.find(")s", start)
            name = res[start + 2 : end]
            res = res.replace(res[start : end + 2], ":" + name)
        self.__translations[stmt] = res
        return (res, row)

    def _translate2qmark(self, stmt, row=None):
        # Translate %(name)s to ? and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find("%(")
            if start == -1:
                break
            end = newstmt.find(")s", start)
            name = newstmt[start + 2 : end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start : end + 2], "?", 1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2numeric(self, stmt, row=None):
        # Translate %(name)s to 1,2,... and build a list of attributes to
        # extract from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        cnt = 0
        newstmt = stmt
        while True:
            start = newstmt.find("%(")
            if start == -1:
                break
            end = newstmt.find(")s", start)
            name = newstmt[start + 2 : end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start : end + 2], ":" + str(cnt))
            cnt += 1
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2format(self, stmt, row=None):
        # Translate %(name)s to %s and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find("%(")
            if start == -1:
                break
            end = newstmt.find(")s", start)
            name = newstmt[start + 2 : end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start : end + 2], "%s", 1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def rowfactory(self, names=None):
        self.__queue.join()
        rows = self.__cursor
        self.__cursor = self.__connection.cursor()
        if names is None:
            if rows.description is None:  # no query was executed ...
                return (nothing for nothing in [])  # a generator with no rows
            else:
                names = [self.nametranslator(t[0]) for t in rows.description]
        return rowfactory(rows, names, True)

    def fetchone(self, names=None):
        self.__queue.join()
        if self.__cursor.description is None:
            return {}
        if names is None:
            names = [self.nametranslator(t[0]) for t in self.__cursor.description]
        values = self.__cursor.fetchone()
        if values is None:
            # A row with each att = None
            return dict([(n, None) for n in names])
        else:
            return dict(zip(names, values))

    def fetchonetuple(self):
        self.__queue.join()
        if self.__cursor.description is None:
            return ()
        values = self.__cursor.fetchone()
        if values is None:
            return (None,) * len(self.__cursor.description)
        else:
            return values

    def fetchmanytuples(self, cnt):
        self.__queue.join()
        if self.__cursor.description is None:
            return []
        return self.__cursor.fetchmany(cnt)

    def fetchalltuples(self):
        self.__queue.join()
        if self.__cursor.description is None:
            return []
        return self.__cursor.fetchall()

    def rowcount(self):
        self.__queue.join()
        return self.__cursor.rowcount

    def getunderlyingmodule(self):
        # No need to join the queue here
        return modules[self.__connection.__class__.__module__]

    def commit(self):
        endload()
        self.__queue.join()
        self.__connection.commit()

    def close(self):
        self.__queue.join()
        self.__connection.close()

    def rollback(self):
        self.__queue.join()
        self.__connection.rollback()

    def setasdefault(self):
        global _defaulttargetconnection
        _defaulttargetconnection = self

    def cursor(self):
        self.__queue.join()
        return self.__connection.cursor()

    def resultnames(self):
        self.__queue.join()
        if self.__cursor.description is None:
            return None
        else:
            return tuple([t[0] for t in self.__cursor.description])

    def __getstate__(self):
        # In case the ConnectionWrapper is pickled (to be sent to another
        # process), we need to create a new cursor when it is unpickled.
        res = self.__dict__.copy()
        del res["_ConnectionWrapper__cursor"]  # a dirty trick, but...

    def __setstate__(self, dict):
        self.__dict__.update(dict)
        self.__cursor = self.__connection.cursor()

    def __worker(self):
        while True:
            (op, curs, stmt, args) = self.__queue.get()
            if op == self._SINGLE:
                curs.execute(stmt, args)
            elif op == self._MANY:
                curs.executemany(stmt, args)
            self.__queue.task_done()
Esempio n. 17
0
class BackgroundConnectionWrapper(object):

    """An alternative implementation of the ConnectionWrapper for experiments.
       This implementation communicates with the database by using a
       separate thread.

       It is likely better to use ConnectionWrapper og a shared
       ConnectionWrapper (see pygrametl.parallel).

       This class offers the same methods as ConnectionWrapper. The
       documentation is not repeated here.
    """
    _SINGLE = 1
    _MANY = 2

    # Most of this class' code was just copied from ConnectionWrapper
    # as we just want to do experiments with this class.

    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        self.__underlyingmodule = None # will be updated next
        self.getunderlyingmodule() # updates self.__underlyingmodule

        if paramstyle is None:
            paramstyle = self.__underlyingmodule.paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        # Thread-stuff
        self.__cursor = connection.cursor()
        self.__queue = Queue(5000)
        t = Thread(target=self.__worker)
        t.daemon = True
        t.start()

    def execute(self, stmt, arguments=None, namemapping=None, translate=True):
        if namemapping and arguments:
            arguments = copy(arguments, **namemapping)
        if self.__translate and translate:
            (stmt, arguments) = self.__translate(stmt, arguments)
        self.__queue.put((self._SINGLE, self.__cursor, stmt, arguments))

    def executemany(self, stmt, params, translate=True):
        if self.__translate and translate:
            # Idea: Translate the statement for the first parameter set. Then
            # reuse the statement (but create new attribute sequences if
            # needed) for the remaining paramter sets
            newstmt = self.__translate(stmt, params[0])[0]
            if isinstance(self.__translations[stmt], str):
                # The paramstyle is 'named' in this case and we don't have to
                # put parameters into sequences
                self.__queue.put((self._MANY, self.__cursor, newstmt, params))
            else:
                # We need to extract attributes and put them into sequences
                # The attributes to extract
                names = self.__translations[stmt][1]
                newparams = [[p[n] for n in names] for p in params]
                self.__queue.put(
                    (self._MANY,
                     self.__cursor,
                     newstmt,
                     newparams))
        else:
            # for pyformat when no translation is necessary
            self.__queue.put((self._MANY, self.__cursor, stmt, params))

    def _translate2named(self, stmt, row=None):
        # Translate %(name)s to :name. No need to change row.
        # Cache only the translated SQL.
        res = self.__translations.get(stmt, None)
        if res:
            return (res, row)
        res = stmt
        while True:
            start = res.find('%(')
            if start == -1:
                break
            end = res.find(')s', start)
            name = res[start + 2: end]
            res = res.replace(res[start:end + 2], ':' + name)
        self.__translations[stmt] = res
        return (res, row)

    def _translate2qmark(self, stmt, row=None):
        # Translate %(name)s to ? and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(
                newstmt[start:end + 2], '?', 1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2numeric(self, stmt, row=None):
        # Translate %(name)s to 1,2,... and build a list of attributes to
        # extract from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        cnt = 0
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt))
            cnt += 1
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2format(self, stmt, row=None):
        # Translate %(name)s to %s and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(
                newstmt[start:end + 2], '%s', 1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def rowfactory(self, names=None):
        self.__queue.join()
        rows = self.__cursor
        self.__cursor = self.__connection.cursor()
        if names is None:
            if rows.description is None:  # no query was executed ...
                return (nothing for nothing in [])  # a generator with no rows
            else:
                names = [self.nametranslator(t[0]) for t in rows.description]
        return rowfactory(rows, names, True)

    def fetchone(self, names=None):
        self.__queue.join()
        if self.__cursor.description is None:
            return {}
        if names is None:
            names = [self.nametranslator(t[0])
                     for t in self.__cursor.description]
        values = self.__cursor.fetchone()
        if values is None:
            # A row with each att = None
            return dict([(n, None) for n in names])
        else:
            return dict(zip(names, values))

    def fetchonetuple(self):
        self.__queue.join()
        if self.__cursor.description is None:
            return ()
        values = self.__cursor.fetchone()
        if values is None:
            return (None, ) * len(self.__cursor.description)
        else:
            return values

    def fetchmanytuples(self, cnt):
        self.__queue.join()
        if self.__cursor.description is None:
            return []
        return self.__cursor.fetchmany(cnt)

    def fetchalltuples(self):
        self.__queue.join()
        if self.__cursor.description is not None:
            while True:
                results = self.__cursor.fetchmany(200)
                if not results:
                    break
                for row in results:
                    yield row

    def rowcount(self):
        self.__queue.join()
        return self.__cursor.rowcount

    def getunderlyingmodule(self):
        """Return a reference to the underlying connection's module.
       
           This is done by considering the connection's __class__'s __module__
           string from right to left (e.g., 'a.b.c', 'a.b', 'a') and looking
           for the attributes 'paramstyle' and 'connect' in the possible modules
        """
        # No need to join the queue here
        if self.__underlyingmodule is not None:
            return self.__underlyingmodule
        else:
            fullmodname = self.__connection.__class__.__module__
            for i in reversed(range(fullmodname.count('.') + 1)):
                modname = fullmodname.rsplit('.', i)[0]
                try:
                    modref = modules[modname]
                    if hasattr(modref, 'paramstyle') and \
                            hasattr(modref, 'connect'):
                        self.__underlyingmodule = modref
                        return modref
                except KeyError:
                    pass

        return None # We could not finde the module. Raise an Exception instead?


    def commit(self):
        endload()
        self.__queue.join()
        self.__connection.commit()

    def close(self):
        self.__queue.join()
        self.__connection.close()

    def rollback(self):
        self.__queue.join()
        self.__connection.rollback()

    def setasdefault(self):
        global _defaulttargetconnection
        _defaulttargetconnection = self

    def cursor(self):
        self.__queue.join()
        return self.__connection.cursor()

    def resultnames(self):
        self.__queue.join()
        if self.__cursor.description is None:
            return None
        else:
            return tuple([t[0] for t in self.__cursor.description])

    def __getstate__(self):
        # In case the ConnectionWrapper is pickled (to be sent to another
        # process), we need to create a new cursor when it is unpickled.
        res = self.__dict__.copy()
        del res['_ConnectionWrapper__cursor']  # a dirty trick, but...

    def __setstate__(self, dictdata):
        self.__dict__.update(dictdata)
        self.__cursor = self.__connection.cursor()

    def __worker(self):
        while True:
            (op, curs, stmt, args) = self.__queue.get()
            if op == self._SINGLE:
                if args is None:
                    curs.execute(stmt)
                else:
                    curs.execute(stmt, args)
            elif op == self._MANY:
                curs.executemany(stmt, args)
            self.__queue.task_done()
Esempio n. 18
0
class ConnectionWrapper(object):

    """Provide a uniform representation of different database connection types.

       All Dimensions and FactTables communicate with the data warehouse using
       a ConnectionWrapper. In this way, the code for loading the DW does not
       have to care about which parameter format is used.

       pygrametl's code uses the 'pyformat' but the ConnectionWrapper performs
       translations of the SQL to use 'named', 'qmark', 'format', or 'numeric'
       if the user's database connection needs this. Note that the
       translations are simple and naive. Escaping as in %%(name)s is not
       taken into consideration. These simple translations are enough for
       pygrametl's code which is the important thing here; we're not trying to
       make a generic, all-purpose tool to get rid of the problems with
       different parameter formats. It is, however, possible to disable the
       translation of a statement to execute such that 'problematic'
       statements can be executed anyway.
    """

    def __init__(self, connection, stmtcachesize=1000, paramstyle=None):
        """Create a ConnectionWrapper around the given PEP 249 connection

           If no default ConnectionWrapper already exists, the new
           ConnectionWrapper is set as the default.

           Arguments:
           - connection: An open PEP 249 connection to the database
           - stmtcachesize: A number deciding how many translated statements to
             cache. A statement needs to be translated when the connection
             does not use 'pyformat' to specify parameters. When 'pyformat' is
             used, stmtcachesize is ignored as no statements need to be
             translated.
           - paramstyle: A string holding the name of the PEP 249 connection's
             paramstyle. If None, pygrametl will try to find the paramstyle
             automatically (an AttributeError can be raised if that fails).
        """
        self.__connection = connection
        self.__cursor = connection.cursor()
        self.nametranslator = lambda s: s

        self.__underlyingmodule = None # will be updated next
        self.getunderlyingmodule() # updates self.__underlyingmodule

        if paramstyle is None:
            paramstyle = self.__underlyingmodule.paramstyle

        if not paramstyle == 'pyformat':
            self.__translations = FIFODict(stmtcachesize)
            try:
                self.__translate = getattr(self, '_translate2' + paramstyle)
            except AttributeError:
                raise InterfaceError("The paramstyle '%s' is not supported" %
                                     paramstyle)
        else:
            self.__translate = None

        global _defaulttargetconnection
        if _defaulttargetconnection is None:
            _defaulttargetconnection = self

    def execute(self, stmt, arguments=None, namemapping=None, translate=True):
        """Execute a statement.

           Arguments:
           - stmt: the statement to execute
           - arguments: a mapping with the arguments (default: None)
           - namemapping: a mapping of names such that if stmt uses %(arg)s
             and namemapping[arg]=arg2, the value arguments[arg2] is used
             instead of arguments[arg]
           - translate: decides if translation from 'pyformat' to the
             undlying connection's format should take place. Default: True
        """
        if namemapping and arguments:
            arguments = copy(arguments, **namemapping)
        if self.__translate and translate:
            (stmt, arguments) = self.__translate(stmt, arguments)

        if arguments is None:
            # Some drivers don't accept None for 'arguments'
            self.__cursor.execute(stmt)
        else:
            self.__cursor.execute(stmt, arguments)


    def executemany(self, stmt, params, translate=True):
        """Execute a sequence of statements."""
        if self.__translate and translate:
            # Idea: Translate the statement for the first parameter set. Then
            # reuse the statement (but create new attribute sequences if
            # needed) for the remaining paramter sets
            newstmt = self.__translate(stmt, params[0])[0]
            if isinstance(self.__translations[stmt], str):
                # The paramstyle is 'named' in this case and we don't have to
                # put parameters into sequences
                self.__cursor.executemany(newstmt, params)
            else:
                # We need to extract attributes and put them into sequences
                # The attributes to extract
                names = self.__translations[stmt][1]
                newparams = [[p[n] for n in names] for p in params]
                self.__cursor.executemany(newstmt, newparams)
        else:
            # for pyformat when no translation is necessary
            self.__cursor.executemany(stmt, params)

    def _translate2named(self, stmt, row=None):
        # Translate %(name)s to :name. No need to change row.
        # Cache only the translated SQL.
        res = self.__translations.get(stmt, None)
        if res:
            return (res, row)
        res = stmt
        while True:
            start = res.find('%(')
            if start == -1:
                break
            end = res.find(')s', start)
            if end == -1:
                break
            name = res[start + 2: end]
            res = res.replace(res[start:end + 2], ':' + name)
        self.__translations[stmt] = res
        return (res, row)

    def _translate2qmark(self, stmt, row=None):
        # Translate %(name)s to ? and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(
                newstmt[start:end + 2], '?', 1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2numeric(self, stmt, row=None):
        # Translate %(name)s to 1,2,... and build a list of attributes to
        # extract from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        cnt = 0
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt))
            cnt += 1
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def _translate2format(self, stmt, row=None):
        # Translate %(name)s to %s and build a list of attributes to extract
        # from row. Cache both.
        (newstmt, names) = self.__translations.get(stmt, (None, None))
        if newstmt:
            return (newstmt, [row[n] for n in names])
        names = []
        newstmt = stmt
        while True:
            start = newstmt.find('%(')
            if start == -1:
                break
            end = newstmt.find(')s', start)
            if end == -1:
                break
            name = newstmt[start + 2: end]
            names.append(name)
            newstmt = newstmt.replace(
                newstmt[
                    start:end +
                    2],
                '%s',
                1)  # Replace once!
        self.__translations[stmt] = (newstmt, names)
        return (newstmt, [row[n] for n in names])

    def rowfactory(self, names=None):
        """Return a generator object returning result rows (i.e. dicts)."""
        rows = self.__cursor
        self.__cursor = self.__connection.cursor()
        if names is None:
            if rows.description is None:  # no query was executed ...
                return (nothing for nothing in [])  # a generator with no rows
            else:
                names = [self.nametranslator(t[0]) for t in rows.description]
        return rowfactory(rows, names, True)

    def fetchone(self, names=None):
        """Return one result row (i.e. dict)."""
        if self.__cursor.description is None:
            return {}
        if names is None:
            names = [self.nametranslator(t[0])
                     for t in self.__cursor.description]
        values = self.__cursor.fetchone()
        if values is None:
            # A row with each att = None
            return dict([(n, None) for n in names])
        else:
            return dict(zip(names, values))

    def fetchonetuple(self):
        """Return one result tuple."""
        if self.__cursor.description is None:
            return ()
        values = self.__cursor.fetchone()
        if values is None:
            return (None, ) * len(self.__cursor.description)
        else:
            return values

    def fetchmanytuples(self, cnt):
        """Return cnt result tuples."""
        if self.__cursor.description is None:
            return []
        return self.__cursor.fetchmany(cnt)

    def fetchalltuples(self):
        """Return all result tuples"""
        if self.__cursor.description is not None:
            while True:
                results = self.__cursor.fetchmany(200)
                if not results:
                    break
                for row in results:
                    yield row

    def rowcount(self):
        """Return the size of the result."""
        return self.__cursor.rowcount

    def getunderlyingmodule(self):
        """Return a reference to the underlying connection's module.
       
           This is done by considering the connection's __class__'s __module__
           string from right to left (e.g., 'a.b.c', 'a.b', 'a') and looking
           for the attributes 'paramstyle' and 'connect' in the possible modules
        """
        if self.__underlyingmodule is not None:
            return self.__underlyingmodule
        else:
            fullmodname = self.__connection.__class__.__module__
            for i in reversed(range(fullmodname.count('.') + 1)):
                modname = fullmodname.rsplit('.', i)[0]
                try:
                    modref = modules[modname]
                    if hasattr(modref, 'paramstyle') and \
                            hasattr(modref, 'connect'):
                        self.__underlyingmodule = modref
                        return modref
                except KeyError:
                    pass

        return None # We could not finde the module. Raise an Exception instead?


    def commit(self):
        """Commit the transaction."""
        endload()
        self.__connection.commit()

    def close(self):
        """Close the connection to the database,"""
        self.__connection.close()

    def rollback(self):
        """Rollback the transaction."""
        self.__connection.rollback()

    def setasdefault(self):
        """Set this ConnectionWrapper as the default connection."""
        global _defaulttargetconnection
        _defaulttargetconnection = self

    def cursor(self):
        """Return a cursor object. Optional method."""
        return self.__connection.cursor()

    def resultnames(self):
        if self.__cursor.description is None:
            return None
        else:
            return tuple([t[0] for t in self.__cursor.description])

    def __getstate__(self):
        # In case the ConnectionWrapper is pickled (to be sent to another
        # process), we need to create a new cursor when it is unpickled.
        res = self.__dict__.copy()
        del res['_ConnectionWrapper__cursor']  # a dirty trick, but...
        return res

    def __setstate__(self, dictdata):
        self.__dict__.update(dictdata)
        self.__cursor = self.__connection.cursor()