Example #1
0
    def __init__(self,
                 url,
                 table_name="store",
                 idfactory=None,
                 extra_fields=None,
                 create=True,
                 **extra_args):
        """
        Open a connection to the storage database identified by `url`.

        DB backend (MySQL, psql, sqlite3) is chosen based on the
        `url.scheme` value.
        """
        super(SqlStore, self).__init__(url)

        # init static public args
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)
        else:
            self.idfactory = idfactory
        self.table_name = table_name

        # save ctor args for lazy-initialization
        self._init_extra_fields = (extra_fields
                                   if extra_fields is not None else {})
        self._init_create = create

        # create slots for lazy-init'ed attrs
        self._real_engine = None
        self._real_extra_fields = None
        self._real_tables = None
Example #2
0
    def test_new_item(self):
        idfactory = IdFactory()
        ids = []
        dummy = DummyObject()
        for i in range(10):
            ids.append(idfactory.new(dummy))
        assert len(ids) == len(set(ids))

        # reserve is tested only in order to check if we get an error calling it...
        idfactory.reserve(5)
Example #3
0
    def test_new_item(self):
        idfactory = IdFactory()
        ids = []
        dummy = DummyObject()
        for i in range(10):
            ids.append(idfactory.new(dummy))
        assert len(ids) == len(set(ids))

        # reserve is tested only in order to check if we get an error calling
        # it...
        idfactory.reserve(5)
Example #4
0
    def __init__(self,
                 url,
                 table_name=None,
                 idfactory=None,
                 extra_fields=None,
                 create=True,
                 **extra_args):
        """
        Open a connection to the storage database identified by `url`.

        DB backend (MySQL, psql, sqlite3) is chosen based on the
        `url.scheme` value.
        """
        super(SqlStore, self).__init__(url)
        if self.url.fragment:
            kv = parse_qs(self.url.fragment)
        else:
            kv = {}

        # init static public args
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)
        else:
            self.idfactory = idfactory

        url_table_names = kv.get('table')
        if url_table_names:
            url_table_name = url_table_names[-1]  # last wins
        else:
            url_table_name = ''
        if table_name is None:
            self.table_name = url_table_name or "store"
        else:
            if table_name != url_table_name:
                gc3libs.log.debug(
                    "DB table name given in store URL fragment,"
                    " but overriden by `table` argument to SqlStore()")
            self.table_name = table_name

        # save ctor args for lazy-initialization
        self._init_extra_fields = (extra_fields
                                   if extra_fields is not None else {})
        self._init_create = create

        # create slots for lazy-init'ed attrs
        self._real_engine = None
        self._real_extra_fields = None
        self._real_tables = None
Example #5
0
    def __init__(self, url, table_name="store", idfactory=None,
                 extra_fields=None, create=True, **extra_args):
        """
        Open a connection to the storage database identified by `url`.

        DB backend (MySQL, psql, sqlite3) is chosen based on the
        `url.scheme` value.
        """
        super(SqlStore, self).__init__(url)

        # init static public args
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)
        else:
            self.idfactory = idfactory
        self.table_name = table_name

        # save ctor args for lazy-initialization
        self._init_extra_fields = (extra_fields if extra_fields is not None else {})
        self._init_create = create

        # create slots for lazy-init'ed attrs
        self._real_engine = None
        self._real_extra_fields = None
        self._real_tables = None
Example #6
0
    def test_custom_next_id(self):
        class next_id(object):
            def __init__(self):
                self.curid = -1
            def __call__(self):
                self.curid += 1
                return self.curid
            
        idfactory = IdFactory(next_id_fn = next_id())

        ids = []
        dummy = DummyObject()
        for i in range(10):
            ids.append(idfactory.new(dummy))
            
        assert len(ids) == len(set(ids))
        
        for i in range(len(ids)):
            assert ids[i] == "DummyObject.%d" % i
Example #7
0
    def __init__(self,
                 url,
                 table_name="store",
                 idfactory=None,
                 extra_fields={},
                 create=True,
                 **extra_args):
        """
        Open a connection to the storage database identified by
        url. It will use the correct backend (MySQL, psql, sqlite3)
        based on the url.scheme value
        """
        self._engine = sqla.create_engine(str(url))
        self.table_name = table_name

        self.__meta = sqla.MetaData(bind=self._engine)

        # create schema
        table = sqla.Table(
            self.table_name, self.__meta,
            sqla.Column('id', sqla.Integer(),
                        primary_key=True, nullable=False),
            sqla.Column('data', sqla.LargeBinary()),
            sqla.Column('state', sqla.String(length=128)))

        # create internal rep of table
        self.extra_fields = dict()
        for col, func in extra_fields.iteritems():
            assert isinstance(col, sqla.Column)
            table.append_column(col.copy())
            self.extra_fields[col.name] = func

        current_metadata = sqla.MetaData(bind=self._engine)
        current_metadata.reflect()
        # check if the db exists and already has a 'store' table
        if create and self.table_name not in current_metadata.tables:
            self.__meta.create_all()

        self.t_store = self.__meta.tables[self.table_name]

        self.idfactory = idfactory
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)
Example #8
0
    def test_custom_next_id(self):
        class next_id(object):
            def __init__(self):
                self.curid = -1

            def __call__(self):
                self.curid += 1
                return self.curid

        idfactory = IdFactory(next_id_fn=next_id())

        ids = []
        dummy = DummyObject()
        for i in range(10):
            ids.append(idfactory.new(dummy))

        assert len(ids) == len(set(ids))

        for i in range(len(ids)):
            assert ids[i] == "DummyObject.%d" % i
Example #9
0
    def __init__(self,
                 directory=gc3libs.Default.JOBS_DIR,
                 idfactory=IdFactory(),
                 protocol=DEFAULT_PROTOCOL,
                 **extra_args):
        if isinstance(directory, gc3libs.url.Url):
            directory = directory.path
        self._directory = directory

        self.idfactory = idfactory
        self._protocol = protocol
Example #10
0
    def __init__(self,
                 directory=gc3libs.Default.JOBS_DIR,
                 idfactory=IdFactory(),
                 protocol=DEFAULT_PROTOCOL,
                 **extra_args):
        if isinstance(directory, Url):
            super(FilesystemStore, self).__init__(directory)
            directory = directory.path
        else:
            super(FilesystemStore, self).__init__(
                Url(scheme='file', path=os.path.abspath(directory)))
        self._directory = directory

        self.idfactory = idfactory
        self._protocol = protocol
Example #11
0
File: sql.py Project: fliem/gc3pie
    def __init__(self, url, table_name="store", idfactory=None,
                 extra_fields={}, create=True, **extra_args):
        """
        Open a connection to the storage database identified by
        url. It will use the correct backend (MySQL, psql, sqlite3)
        based on the url.scheme value
        """
        super(SqlStore, self).__init__(url)
        self._engine = sqla.create_engine(str(url))
        self.table_name = table_name

        self.__meta = sqla.MetaData(bind=self._engine)

        # create schema
        table = sqla.Table(
            self.table_name,
            self.__meta,
            sqla.Column('id',
                        sqla.Integer(),
                        primary_key=True, nullable=False),
            sqla.Column('data',
                        sqla.LargeBinary()),
            sqla.Column('state',
                        sqla.String(length=128)))

        # create internal rep of table
        self.extra_fields = dict()
        for col, func in extra_fields.iteritems():
            assert isinstance(col, sqla.Column)
            table.append_column(col.copy())
            self.extra_fields[col.name] = func

        current_metadata = sqla.MetaData(bind=self._engine)
        current_metadata.reflect()
        # check if the db exists and already has a 'store' table
        if create and self.table_name not in current_metadata.tables:
            self.__meta.create_all()

        self.t_store = self.__meta.tables[self.table_name]

        self.idfactory = idfactory
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)
Example #12
0
def test_store_ctor_with_extra_arguments(cls, tmpdir):
    """Test if `Store`:class: classess accept extra keyword arguments
    without complaining.

    This test will ensure that any `Store` class can be called with
    arguments which are valid only on other `Store` classes.
    """
    args = {
        'url': 'sqlite:///%s/test.sqlite' % os.path.abspath(str(tmpdir)),
        'table_name': 'store',
        'create': True,
        'directory': str(tmpdir),
        'idfactory': IdFactory(),
        'protocol': DEFAULT_PROTOCOL,
        'extra_fields': {
            sqlalchemy.Column('extra', sqlalchemy.TEXT()): lambda x: "test"
        },
    }

    cls(**args)
Example #13
0
class SqlStore(Store):
    """
    Save and load objects in a SQL db, using python's `pickle` module
    to serialize objects into a specific field.

    Access to the DB is done via SQLAlchemy module, therefore any
    driver supported by SQLAlchemy will be supported by this class.

    The `url` argument is used to access the store. It is supposed to
    be a `gc3libs.url.Url`:class: class, and therefore may contain
    username, password, host and port if they are needed by the db
    used.

    The `table_name` argument is the name of the table to create. By
    default it's ``store``.  Alternatively, the table name can be
    given in the "fragment" part of the database URL, as
    ``#table=...`` (replace ``...`` with the actual table name).  The
    constructor argument takes precedence over the table name
    specified in the DB URL.

    The constructor will create the `table_name` table if it does not
    exist, but if there already is such a table it will assume that
    its schema is compatible with our needs. A minimal table schema
    is as follows::

        +-----------+--------------+------+-----+---------+
        | Field     | Type         | Null | Key | Default |
        +-----------+--------------+------+-----+---------+
        | id        | int(11)      | NO   | PRI | NULL    |
        | data      | blob         | YES  |     | NULL    |
        | state     | varchar(128) | YES  |     | NULL    |
        +-----------+--------------+------+-----+---------+

    The meaning of the fields is:

    - `id`: this is the id returned by the `save()` method and
      uniquely identifies a stored object.

    - `data`: serialized Python object.

    - `state`: if the object is a `Task`:class: instance, this will be
      its current execution state.

    The `extra_fields` constructor argument is used to extend the
    database. It must contain a mapping `*column*: *function*`
    where:

    - *column* is a `sqlalchemy.Column` object.

    - *function* is a function which takes the object to be saved as
      argument and returns the value to be stored into the
      database. Any exception raised by this function will be
      *ignored*.  Classes `GetAttribute`:class: and `GetItem`:class:
      in module `get`:mod: provide convenient helpers to save object
      attributes into table columns.

    For each extra column the `save()` method will call the
    corresponding *function* in order to get the correct value to
    store into the DB.

    Any extra keyword arguments are ignored for compatibility with
    `FilesystemStore`:class:.
    """
    def __init__(self,
                 url,
                 table_name=None,
                 idfactory=None,
                 extra_fields=None,
                 create=True,
                 **extra_args):
        """
        Open a connection to the storage database identified by `url`.

        DB backend (MySQL, psql, sqlite3) is chosen based on the
        `url.scheme` value.
        """
        super(SqlStore, self).__init__(url)
        if self.url.fragment:
            kv = parse_qs(self.url.fragment)
        else:
            kv = {}

        # init static public args
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)
        else:
            self.idfactory = idfactory

        url_table_names = kv.get('table')
        if url_table_names:
            url_table_name = url_table_names[-1]  # last wins
        else:
            url_table_name = ''
        if table_name is None:
            self.table_name = url_table_name or "store"
        else:
            if table_name != url_table_name:
                gc3libs.log.debug(
                    "DB table name given in store URL fragment,"
                    " but overriden by `table` argument to SqlStore()")
            self.table_name = table_name

        # save ctor args for lazy-initialization
        self._init_extra_fields = (extra_fields
                                   if extra_fields is not None else {})
        self._init_create = create

        # create slots for lazy-init'ed attrs
        self._real_engine = None
        self._real_extra_fields = None
        self._real_tables = None

    @staticmethod
    def _to_sqlalchemy_url(url):
        if url.scheme == 'sqlite':
            # rewrite ``sqlite`` URLs to be RFC compliant, see:
            # https://github.com/uzh/gc3pie/issues/261
            db_url = "%s://%s/%s" % (url.scheme, url.netloc, url.path)
        else:
            db_url = str(url)
        # remove fragment identifier, if any
        try:
            fragment_loc = db_url.index('#')
            db_url = db_url[:fragment_loc]
        except ValueError:
            pass
        return db_url

    def _delayed_init(self):
        """
        Perform initialization tasks that can interfere with
        forking/multiprocess setup.

        See `GC3Pie issue #550
        <https://github.com/uzh/gc3pie/issues/550>`_ for more details
        and motivation.
        """
        self._real_engine = sqla.create_engine(
            self._to_sqlalchemy_url(self.url))

        # create schema
        meta = sqla.MetaData(bind=self._real_engine)
        table = sqla.Table(
            self.table_name, meta,
            sqla.Column('id', sqla.Integer(),
                        primary_key=True, nullable=False),
            sqla.Column('data', sqla.LargeBinary()),
            sqla.Column('state', sqla.String(length=128)))

        # create internal rep of table
        self._real_extra_fields = {}
        for col, func in self._init_extra_fields.iteritems():
            assert isinstance(col, sqla.Column)
            table.append_column(col.copy())
            self._real_extra_fields[col.name] = func

        # check if the db exists and already has a 'store' table
        current_meta = sqla.MetaData(bind=self._real_engine)
        current_meta.reflect()
        if self._init_create and self.table_name not in current_meta.tables:
            meta.create_all()

        self._real_tables = meta.tables[self.table_name]

    @property
    def _engine(self):
        if self._real_engine is None:
            self._delayed_init()
        return self._real_engine

    @property
    def _tables(self):
        if self._real_tables is None:
            self._delayed_init()
        return self._real_tables

    # FIXME: Remove once the TissueMAPS code is updated not to use this any more!
    @property
    def t_store(self):
        """
        Deprecated compatibility alias for `SqlStore._tables`
        """
        warn(
            "`SqlStore.t_store` has been renamed to `SqlStore._tables`;"
            " please update your code", DeprecationWarning, 2)
        return self._tables

    @property
    def extra_fields(self):
        if self._real_extra_fields is None:
            self._delayed_init()
        return self._real_extra_fields

    @same_docstring_as(Store.list)
    def list(self):
        q = sql.select([self._tables.c.id])
        with closing(self._engine.connect()) as conn:
            rows = conn.execute(q)
            ids = [i[0] for i in rows.fetchall()]
        return ids

    @same_docstring_as(Store.replace)
    def replace(self, id_, obj):
        self._save_or_replace(id_, obj)

    # copied from FilesystemStore
    @same_docstring_as(Store.save)
    def save(self, obj):
        if not hasattr(obj, 'persistent_id'):
            obj.persistent_id = self.idfactory.new(obj)
        return self._save_or_replace(obj.persistent_id, obj)

    def _save_or_replace(self, id_, obj):
        # build row to insert/update
        fields = {'id': id_}

        with closing(StringIO()) as dstdata:
            make_pickler(self, dstdata, obj).dump(obj)
            fields['data'] = dstdata.getvalue()

        try:
            fields['state'] = obj.execution.state
        except AttributeError:
            # If we cannot determine the state of a task, consider it UNKNOWN.
            fields['state'] = Run.State.UNKNOWN

        # insert into db
        for column in self.extra_fields:
            try:
                fields[column] = self.extra_fields[column](obj)
                gc3libs.log.debug(
                    "Writing value '%s' in column '%s' for object '%s'",
                    fields[column], column, obj)
            except Exception as ex:
                gc3libs.log.warning(
                    "Error saving DB column '%s' of object '%s': %s: %s",
                    column, obj, ex.__class__.__name__, str(ex))

        with closing(self._engine.connect()) as conn:
            q = sql.select([self._tables.c.id]).where(self._tables.c.id == id_)
            r = conn.execute(q)
            if not r.fetchone():
                # It's an insert
                q = self._tables.insert().values(**fields)
                conn.execute(q)
            else:
                # it's an update
                q = self._tables.update().where(
                    self._tables.c.id == id_).values(**fields)
                conn.execute(q)
            obj.persistent_id = id_
            if hasattr(obj, 'changed'):
                obj.changed = False

        # return id
        return obj.persistent_id

    @same_docstring_as(Store.load)
    def load(self, id_):
        with closing(self._engine.connect()) as conn:
            q = sql.select([self._tables.c.data
                            ]).where(self._tables.c.id == id_)
            rawdata = conn.execute(q).fetchone()
            if not rawdata:
                raise gc3libs.exceptions.LoadError(
                    "Unable to find any object with ID '%s'" % id_)
            obj = make_unpickler(self, StringIO(rawdata[0])).load()
        super(SqlStore, self)._update_to_latest_schema()
        return obj

    @same_docstring_as(Store.remove)
    def remove(self, id_):
        with closing(self._engine.connect()) as conn:
            conn.execute(self._tables.delete().where(self._tables.c.id == id_))
Example #14
0
File: sql.py Project: fliem/gc3pie
class SqlStore(Store):

    """
    Save and load objects in a SQL db, using python's `pickle` module
    to serialize objects into a specific field.

    Access to the DB is done via SQLAlchemy module, therefore any
    driver supported by SQLAlchemy will be supported by this class.

    The `url` argument is used to access the store. It is supposed to
    be a `gc3libs.url.Url`:class: class, and therefore may contain
    username, password, host and port if they are needed by the db
    used.

    The `table_name` argument is the name of the table to create. By
    default it's ``store``.

    The constructor will create the `table_name` table if it does not
    exist, but if there already is such a table it will assume the
    it's schema is compatible with our needs. A minimal table schema
    is as follow:

    The meaning of the fields is:

    `id`: this is the id returned by the `save()` method and
    univoquely identify a stored object.

    `data`: the serialization of the object.

    `state`: if the object is a `Task` istance this wil lbe the
    current execution state of the job

        +-----------+--------------+------+-----+---------+
        | Field     | Type         | Null | Key | Default |
        +-----------+--------------+------+-----+---------+
        | id        | int(11)      | NO   | PRI | NULL    |
        | data      | blob         | YES  |     | NULL    |
        | state     | varchar(128) | YES  |     | NULL    |
        +-----------+--------------+------+-----+---------+


    The `extra_fields` argument is used to extend the database. It
    must contain a mapping `<column>` : `<function>` where:

    `<column>` is a `sqlalchemy.Column` object.

    `<function>` is a function which takes the object to be saved as
    argument and returns the value to be stored into the database. Any
    exception raised by this function will be *ignored*.  Classes
    `GetAttribute`:class: and `GetItem`:class: in module `get`:mod:
    provide convenient helpers to save object attributes into table
    columns.

    For each extra column the `save()` method will call the
    corresponding `<function>` in order to get the correct value to
    store into the db.

    Any extra keyword arguments are ignored for compatibility with
    `FilesystemStore`.

    """

    def __init__(self, url, table_name="store", idfactory=None,
                 extra_fields={}, create=True, **extra_args):
        """
        Open a connection to the storage database identified by
        url. It will use the correct backend (MySQL, psql, sqlite3)
        based on the url.scheme value
        """
        super(SqlStore, self).__init__(url)
        self._engine = sqla.create_engine(str(url))
        self.table_name = table_name

        self.__meta = sqla.MetaData(bind=self._engine)

        # create schema
        table = sqla.Table(
            self.table_name,
            self.__meta,
            sqla.Column('id',
                        sqla.Integer(),
                        primary_key=True, nullable=False),
            sqla.Column('data',
                        sqla.LargeBinary()),
            sqla.Column('state',
                        sqla.String(length=128)))

        # create internal rep of table
        self.extra_fields = dict()
        for col, func in extra_fields.iteritems():
            assert isinstance(col, sqla.Column)
            table.append_column(col.copy())
            self.extra_fields[col.name] = func

        current_metadata = sqla.MetaData(bind=self._engine)
        current_metadata.reflect()
        # check if the db exists and already has a 'store' table
        if create and self.table_name not in current_metadata.tables:
            self.__meta.create_all()

        self.t_store = self.__meta.tables[self.table_name]

        self.idfactory = idfactory
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)

    @same_docstring_as(Store.list)
    def list(self):
        q = sql.select([self.t_store.c.id])
        conn = self._engine.connect()
        rows = conn.execute(q)
        ids = [i[0] for i in rows.fetchall()]
        conn.close()
        return ids

    @same_docstring_as(Store.replace)
    def replace(self, id_, obj):
        self._save_or_replace(id_, obj)

    # copied from FilesystemStore
    @same_docstring_as(Store.save)
    def save(self, obj):
        if not hasattr(obj, 'persistent_id'):
            obj.persistent_id = self.idfactory.new(obj)
        return self._save_or_replace(obj.persistent_id, obj)

    def _save_or_replace(self, id_, obj):
        fields = {'id': id_}

        dstdata = StringIO.StringIO()
        pickler = make_pickler(self, dstdata, obj)
        pickler.dump(obj)
        fields['data'] = dstdata.getvalue()

        try:
            fields['state'] = obj.execution.state
        except AttributeError:
            # If we cannot determine the state of a task, consider it UNKNOWN.
            fields['state'] = Run.State.UNKNOWN

        # insert into db
        for column in self.extra_fields:
            try:
                fields[column] = self.extra_fields[column](obj)
                gc3libs.log.debug(
                    "Writing value '%s' in column '%s' for object '%s'",
                    fields[column], column, obj)
            except Exception as ex:
                gc3libs.log.warning(
                    "Error saving DB column '%s' of object '%s': %s: %s",
                    column, obj, ex.__class__.__name__, str(ex))

        q = sql.select([self.t_store.c.id]).where(self.t_store.c.id == id_)
        conn = self._engine.connect()
        r = conn.execute(q)
        if not r.fetchone():
            # It's an insert
            q = self.t_store.insert().values(**fields)
            conn.execute(q)
        else:
            # it's an update
            q = self.t_store.update().where(
                self.t_store.c.id == id_).values(**fields)
            conn.execute(q)
        obj.persistent_id = id_
        if hasattr(obj, 'changed'):
            obj.changed = False
        conn.close()

        # return id
        return obj.persistent_id

    @same_docstring_as(Store.load)
    def load(self, id_):
        q = sql.select([self.t_store.c.data]).where(self.t_store.c.id == id_)
        conn = self._engine.connect()
        r = conn.execute(q)
        rawdata = r.fetchone()
        if not rawdata:
            raise gc3libs.exceptions.LoadError(
                "Unable to find any object with ID '%s'" % id_)
        unpickler = make_unpickler(self, StringIO.StringIO(rawdata[0]))
        obj = unpickler.load()
        conn.close()

        super(SqlStore, self)._update_to_latest_schema()
        return obj

    @same_docstring_as(Store.remove)
    def remove(self, id_):
        conn = self._engine.connect()
        conn.execute(self.t_store.delete().where(self.t_store.c.id == id_))
        conn.close()
Example #15
0
class SqlStore(Store):

    """
    Save and load objects in a SQL db, using python's `pickle` module
    to serialize objects into a specific field.

    Access to the DB is done via SQLAlchemy module, therefore any
    driver supported by SQLAlchemy will be supported by this class.

    The `url` argument is used to access the store. It is supposed to
    be a `gc3libs.url.Url`:class: class, and therefore may contain
    username, password, host and port if they are needed by the db
    used.

    The `table_name` argument is the name of the table to create. By
    default it's ``store``.

    The constructor will create the `table_name` table if it does not
    exist, but if there already is such a table it will assume that
    its schema is compatible with our needs. A minimal table schema
    is as follows::

        +-----------+--------------+------+-----+---------+
        | Field     | Type         | Null | Key | Default |
        +-----------+--------------+------+-----+---------+
        | id        | int(11)      | NO   | PRI | NULL    |
        | data      | blob         | YES  |     | NULL    |
        | state     | varchar(128) | YES  |     | NULL    |
        +-----------+--------------+------+-----+---------+

    The meaning of the fields is:

    - `id`: this is the id returned by the `save()` method and
      uniquely identifies a stored object.

    - `data`: serialized Python object.

    - `state`: if the object is a `Task`:class: instance, this will be
      its current execution state.

    The `extra_fields` constructor argument is used to extend the
    database. It must contain a mapping `*column*: *function*`
    where:

    - *column* is a `sqlalchemy.Column` object.

    - *function* is a function which takes the object to be saved as
      argument and returns the value to be stored into the
      database. Any exception raised by this function will be
      *ignored*.  Classes `GetAttribute`:class: and `GetItem`:class:
      in module `get`:mod: provide convenient helpers to save object
      attributes into table columns.

    For each extra column the `save()` method will call the
    corresponding *function* in order to get the correct value to
    store into the DB.

    Any extra keyword arguments are ignored for compatibility with
    `FilesystemStore`:class:.
    """

    def __init__(self, url, table_name="store", idfactory=None,
                 extra_fields=None, create=True, **extra_args):
        """
        Open a connection to the storage database identified by `url`.

        DB backend (MySQL, psql, sqlite3) is chosen based on the
        `url.scheme` value.
        """
        super(SqlStore, self).__init__(url)

        # init static public args
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)
        else:
            self.idfactory = idfactory
        self.table_name = table_name

        # save ctor args for lazy-initialization
        self._init_extra_fields = (extra_fields if extra_fields is not None else {})
        self._init_create = create

        # create slots for lazy-init'ed attrs
        self._real_engine = None
        self._real_extra_fields = None
        self._real_tables = None

    def _delayed_init(self):
        """
        Perform initialization tasks that can interfere with
        forking/multiprocess setup.

        See `GC3Pie issue #550
        <https://github.com/uzh/gc3pie/issues/550>`_ for more details
        and motivation.
        """
        self._real_engine = sqla.create_engine(str(self.url))

        # create schema
        meta = sqla.MetaData(bind=self._real_engine)
        table = sqla.Table(
            self.table_name,
            meta,
            sqla.Column('id',
                        sqla.Integer(),
                        primary_key=True, nullable=False),
            sqla.Column('data',
                        sqla.LargeBinary()),
            sqla.Column('state',
                        sqla.String(length=128)))

        # create internal rep of table
        self._real_extra_fields = {}
        for col, func in self._init_extra_fields.iteritems():
            assert isinstance(col, sqla.Column)
            table.append_column(col.copy())
            self._real_extra_fields[col.name] = func

        # check if the db exists and already has a 'store' table
        current_meta = sqla.MetaData(bind=self._real_engine)
        current_meta.reflect()
        if self._init_create and self.table_name not in current_meta.tables:
            meta.create_all()

        self._real_tables = meta.tables[self.table_name]

    @property
    def _engine(self):
        if self._real_engine is None:
            self._delayed_init()
        return self._real_engine

    @property
    def _tables(self):
        if self._real_tables is None:
            self._delayed_init()
        return self._real_tables

    @property
    def extra_fields(self):
        if self._real_extra_fields is None:
            self._delayed_init()
        return self._real_extra_fields

    @same_docstring_as(Store.list)
    def list(self):
        q = sql.select([self._tables.c.id])
        conn = self._engine.connect()
        rows = conn.execute(q)
        ids = [i[0] for i in rows.fetchall()]
        conn.close()
        return ids

    @same_docstring_as(Store.replace)
    def replace(self, id_, obj):
        self._save_or_replace(id_, obj)

    # copied from FilesystemStore
    @same_docstring_as(Store.save)
    def save(self, obj):
        if not hasattr(obj, 'persistent_id'):
            obj.persistent_id = self.idfactory.new(obj)
        return self._save_or_replace(obj.persistent_id, obj)

    def _save_or_replace(self, id_, obj):
        # build row to insert/update
        fields = {'id': id_}

        with closing(StringIO()) as dstdata:
            make_pickler(self, dstdata, obj).dump(obj)
            fields['data'] = dstdata.getvalue()

        try:
            fields['state'] = obj.execution.state
        except AttributeError:
            # If we cannot determine the state of a task, consider it UNKNOWN.
            fields['state'] = Run.State.UNKNOWN

        # insert into db
        for column in self.extra_fields:
            try:
                fields[column] = self.extra_fields[column](obj)
                gc3libs.log.debug(
                    "Writing value '%s' in column '%s' for object '%s'",
                    fields[column], column, obj)
            except Exception as ex:
                gc3libs.log.warning(
                    "Error saving DB column '%s' of object '%s': %s: %s",
                    column, obj, ex.__class__.__name__, str(ex))

        with closing(self._engine.connect()) as conn:
            q = sql.select([self._tables.c.id]).where(self._tables.c.id == id_)
            r = conn.execute(q)
            if not r.fetchone():
                # It's an insert
                q = self._tables.insert().values(**fields)
                conn.execute(q)
            else:
                # it's an update
                q = self._tables.update().where(
                    self._tables.c.id == id_).values(**fields)
                conn.execute(q)
            obj.persistent_id = id_
            if hasattr(obj, 'changed'):
                obj.changed = False

        # return id
        return obj.persistent_id

    @same_docstring_as(Store.load)
    def load(self, id_):
        with closing(self._engine.connect()) as conn:
            q = sql.select([self._tables.c.data]).where(self._tables.c.id == id_)
            rawdata = conn.execute(q).fetchone()
            if not rawdata:
                raise gc3libs.exceptions.LoadError(
                    "Unable to find any object with ID '%s'" % id_)
            obj = make_unpickler(self, StringIO(rawdata[0])).load()
        super(SqlStore, self)._update_to_latest_schema()
        return obj

    @same_docstring_as(Store.remove)
    def remove(self, id_):
        with closing(self._engine.connect()) as conn:
            conn.execute(self._tables.delete().where(self._tables.c.id == id_))
Example #16
0
class SqlStore(Store):
    """
    Save and load objects in a SQL db, using python's `pickle` module
    to serialize objects into a specific field.

    Access to the DB is done via SQLAlchemy module, therefore any
    driver supported by SQLAlchemy will be supported by this class.

    The `url` argument is used to access the store. It is supposed to
    be a `gc3libs.url.Url`:class: class, and therefore may contain
    username, password, host and port if they are needed by the db
    used.

    The `table_name` argument is the name of the table to create. By
    default it's ``store``.

    The constructor will create the `table_name` table if it does not
    exist, but if there already is such a table it will assume the
    it's schema is compatible with our needs. A minimal table schema
    is as follow:

    The meaning of the fields is:

    `id`: this is the id returned by the `save()` method and
    univoquely identify a stored object.

    `data`: the serialization of the object.

    `state`: if the object is a `Task` istance this wil lbe the
    current execution state of the job

        +-----------+--------------+------+-----+---------+
        | Field     | Type         | Null | Key | Default |
        +-----------+--------------+------+-----+---------+
        | id        | int(11)      | NO   | PRI | NULL    |
        | data      | blob         | YES  |     | NULL    |
        | state     | varchar(128) | YES  |     | NULL    |
        +-----------+--------------+------+-----+---------+


    The `extra_fields` argument is used to extend the database. It
    must contain a mapping `<column>` : `<function>` where:

    `<column>` is a `sqlalchemy.Column` object.

    `<function>` is a function which takes the object to be saved as
    argument and returns the value to be stored into the database. Any
    exception raised by this function will be *ignored*.  Classes
    `GetAttribute`:class: and `GetItem`:class: in module `get`:mod:
    provide convenient helpers to save object attributes into table
    columns.

    For each extra column the `save()` method will call the
    corresponding `<function>` in order to get the correct value to
    store into the db.

    Any extra keyword arguments are ignored for compatibility with
    `FilesystemStore`.

    """
    def __init__(self,
                 url,
                 table_name="store",
                 idfactory=None,
                 extra_fields={},
                 create=True,
                 **extra_args):
        """
        Open a connection to the storage database identified by
        url. It will use the correct backend (MySQL, psql, sqlite3)
        based on the url.scheme value
        """
        self._engine = sqla.create_engine(str(url))
        self.table_name = table_name

        self.__meta = sqla.MetaData(bind=self._engine)

        # create schema
        table = sqla.Table(
            self.table_name, self.__meta,
            sqla.Column('id', sqla.Integer(),
                        primary_key=True, nullable=False),
            sqla.Column('data', sqla.LargeBinary()),
            sqla.Column('state', sqla.String(length=128)))

        # create internal rep of table
        self.extra_fields = dict()
        for col, func in extra_fields.iteritems():
            assert isinstance(col, sqla.Column)
            table.append_column(col.copy())
            self.extra_fields[col.name] = func

        current_metadata = sqla.MetaData(bind=self._engine)
        current_metadata.reflect()
        # check if the db exists and already has a 'store' table
        if create and self.table_name not in current_metadata.tables:
            self.__meta.create_all()

        self.t_store = self.__meta.tables[self.table_name]

        self.idfactory = idfactory
        if not idfactory:
            self.idfactory = IdFactory(id_class=IntId)

    @same_docstring_as(Store.list)
    def list(self):
        q = sql.select([self.t_store.c.id])
        conn = self._engine.connect()
        rows = conn.execute(q)
        ids = [i[0] for i in rows.fetchall()]
        conn.close()
        return ids

    @same_docstring_as(Store.replace)
    def replace(self, id_, obj):
        self._save_or_replace(id_, obj)

    # copied from FilesystemStore
    @same_docstring_as(Store.save)
    def save(self, obj):
        if not hasattr(obj, 'persistent_id'):
            obj.persistent_id = self.idfactory.new(obj)
        return self._save_or_replace(obj.persistent_id, obj)

    def _save_or_replace(self, id_, obj):
        fields = {'id': id_}

        dstdata = StringIO.StringIO()
        pickler = make_pickler(self, dstdata, obj)
        pickler.dump(obj)
        fields['data'] = dstdata.getvalue()

        try:
            fields['state'] = obj.execution.state
        except AttributeError:
            # If we cannot determine the state of a task, consider it UNKNOWN.
            fields['state'] = Run.State.UNKNOWN

        # insert into db
        for column in self.extra_fields:
            try:
                fields[column] = self.extra_fields[column](obj)
                gc3libs.log.debug(
                    "Writing value '%s' in column '%s' for object '%s'",
                    fields[column], column, obj)
            except Exception as ex:
                gc3libs.log.warning(
                    "Error saving DB column '%s' of object '%s': %s: %s",
                    column, obj, ex.__class__.__name__, str(ex))

        q = sql.select([self.t_store.c.id]).where(self.t_store.c.id == id_)
        conn = self._engine.connect()
        r = conn.execute(q)
        if not r.fetchone():
            # It's an insert
            q = self.t_store.insert().values(**fields)
            conn.execute(q)
        else:
            # it's an update
            q = self.t_store.update().where(self.t_store.c.id == id_).values(
                **fields)
            conn.execute(q)
        obj.persistent_id = id_
        if hasattr(obj, 'changed'):
            obj.changed = False
        conn.close()

        # return id
        return obj.persistent_id

    @same_docstring_as(Store.load)
    def load(self, id_):
        q = sql.select([self.t_store.c.data]).where(self.t_store.c.id == id_)
        conn = self._engine.connect()
        r = conn.execute(q)
        rawdata = r.fetchone()
        if not rawdata:
            raise gc3libs.exceptions.LoadError(
                "Unable to find any object with ID '%s'" % id_)
        unpickler = make_unpickler(self, StringIO.StringIO(rawdata[0]))
        obj = unpickler.load()
        conn.close()

        return obj

    @same_docstring_as(Store.remove)
    def remove(self, id_):
        conn = self._engine.connect()
        conn.execute(self.t_store.delete().where(self.t_store.c.id == id_))
        conn.close()