Beispiel #1
0
 def insert(self, objects, session=None, table=None):
     objects = objects.values() if isinstance(objects, Mapping) else objects
     is_array(objects, 'objects must be a list')
     table = self.get_table(table)
     if self._lock_required:
         with LockFile(self._sqlite_path):
             self._exec_transaction(cmd=table.insert(), params=objects,
                                    session=session)
     else:
         self._exec_transaction(cmd=table.insert(), params=objects,
                                session=session)
Beispiel #2
0
 def insert(self, objects, session=None, table=None):
     objects = objects.values() if isinstance(objects, Mapping) else objects
     is_array(objects, 'objects must be a list')
     table = self.get_table(table)
     if self._lock_required:
         with LockFile(self._sqlite_path):
             self._exec_transaction(cmd=table.insert(),
                                    params=objects,
                                    session=session)
     else:
         self._exec_transaction(cmd=table.insert(),
                                params=objects,
                                session=session)
Beispiel #3
0
 def _delta_force(self, force=None, last_update=None):
     force = force or self.lconfig.get('force') or False
     oids = []
     _c = self.container
     save_delta_ts = False
     if is_array(force):
         oids = list(force)
     elif not force:
         save_delta_ts = True
         if self.lconfig.get('delta_new_ids', True):
             # get all new (unknown) oids
             new_oids = self.get_new_oids()
             oids.extend(new_oids)
         if self.lconfig.get('delta_mtime', False):
             last_update = self._fetch_mtime(last_update)
             # get only those oids that have changed since last update
             oids.extend(self.get_changed_oids(last_update))
     elif force is True or not _c.exists():
         save_delta_ts = True
         # if force or if the container doesn't exist
         # get a list of all known object ids
         oids = self.sql_get_oids()
     else:
         oids = [force]
     logger.debug("Delta Size: %s" % len(oids))
     return sorted(set(oids)), save_delta_ts
Beispiel #4
0
    def _generate_sql(self, _oids=None, sort=True):
        db = self.lconfig.get('db_schema_name') or self.lconfig.get('db')
        _oid = self.lconfig.get('_oid')
        if is_array(_oid):
            _oid = _oid[0]  # get the db column, not the field alias
        table = self.lconfig.get('table')

        if not all((_oid, table, db)):
            raise ValueError("Must define db, table, _oid in config!")
        selects = []
        stmts = []
        for as_field, opts in self.fields.iteritems():
            select = opts.get('select')
            if not select:
                # not a SQL based field
                continue
            select = '%s as %s' % (select, as_field)
            selects.append(select)
            sql = opts.get('sql') or ''
            sql = re.sub('\s+', ' ', sql)
            if sql:
                stmts.append(sql)

        selects = ', '.join(selects)
        stmts = ' '.join(stmts)
        sql = 'SELECT %s FROM %s.%s %s' % (selects, db, table, stmts)
        if _oids:
            sql += ' WHERE %s.%s in (%s)' % (table, _oid,
                                             ','.join(map(str, _oids)))
        if sort:
            sql += " ORDER BY %s.%s ASC" % (table, _oid)
        sql = re.sub('\s+', ' ', sql)
        return sql
Beispiel #5
0
    def _generate_sql(self, _oids=None, sort=True):
        db = self.lconfig.get('db_schema_name') or self.lconfig.get('db')
        _oid = self.lconfig.get('_oid')
        if is_array(_oid):
            _oid = _oid[0]  # get the db column, not the field alias
        table = self.lconfig.get('table')

        if not all((_oid, table, db)):
            raise ValueError("Must define db, table, _oid in config!")
        selects = []
        stmts = []
        for as_field, opts in self.fields.iteritems():
            select = opts.get('select')
            if not select:
                # not a SQL based field
                continue
            select = '%s as %s' % (select, as_field)
            selects.append(select)
            sql = opts.get('sql') or ''
            sql = re.sub('\s+', ' ', sql)
            if sql:
                stmts.append(sql)

        selects = ', '.join(selects)
        stmts = ' '.join(stmts)
        sql = 'SELECT %s FROM %s.%s %s' % (selects, db, table, stmts)
        if _oids:
            sql += ' WHERE %s.%s in (%s)' % (table, _oid, ','.join(
                map(str, _oids)))
        if sort:
            sql += " ORDER BY %s.%s ASC" % (table, _oid)
        sql = re.sub('\s+', ' ', sql)
        return sql
Beispiel #6
0
 def _delta_force(self, force=None, last_update=None):
     force = force or self.lconfig.get('force') or False
     oids = []
     _c = self.container
     save_delta_ts = False
     if is_array(force):
         oids = list(force)
     elif not force:
         save_delta_ts = True
         if self.lconfig.get('delta_new_ids', True):
             # get all new (unknown) oids
             new_oids = self.get_new_oids()
             oids.extend(new_oids)
         if self.lconfig.get('delta_mtime', False):
             last_update = self._fetch_mtime(last_update)
             # get only those oids that have changed since last update
             oids.extend(self.get_changed_oids(last_update))
     elif force is True or not _c.exists():
         save_delta_ts = True
         # if force or if the container doesn't exist
         # get a list of all known object ids
         oids = self.sql_get_oids()
     else:
         oids = [force]
     logger.debug("Delta Size: %s" % len(oids))
     return sorted(set(oids)), save_delta_ts
Beispiel #7
0
 def _prep_objects(self, objects):
     _oid = self.lconfig.get('_oid')
     if is_array(_oid):
         _oid = _oid[1]  # get the field name, not the actual db column
     for i, obj in enumerate(objects):
         obj['_oid'] = obj[_oid]  # map _oid
         objects[i] = obj
     return objects
Beispiel #8
0
 def _prep_objects(self, objects):
     _oid = self.lconfig.get('_oid')
     if is_array(_oid):
         _oid = _oid[1]  # get the field name, not the actual db column
     for i, obj in enumerate(objects):
         obj['_oid'] = obj[_oid]  # map _oid
         objects[i] = obj
     return objects
Beispiel #9
0
 def _update(self, objects):
     if is_null(objects):
         pass
     elif is_array(objects, except_=False):
         [self.add(x) for x in tuple(objects)]
     elif isinstance(objects, MetriqueContainer):
         [self.add(o) for o in objects.itervalues()]
     else:
         raise ValueError(
             "objs must be None, a list, tuple, dict or MetriqueContainer")
Beispiel #10
0
 def _update(self, objects):
     if is_null(objects):
         pass
     elif is_array(objects, except_=False):
         [self.add(x) for x in tuple(objects)]
     elif isinstance(objects, MetriqueContainer):
         [self.add(o) for o in objects.itervalues()]
     else:
         raise ValueError(
             "objs must be None, a list, tuple, dict or MetriqueContainer")
Beispiel #11
0
 def sql_get_oids(self, where=None):
     '''
     Query source database for a distinct list of oids.
     '''
     table = self.lconfig.get('table')
     db = self.lconfig.get('db_schema_name') or self.lconfig.get('db')
     _oid = self.lconfig.get('_oid')
     if is_array(_oid):
         _oid = _oid[0]  # get the db column, not the field alias
     sql = 'SELECT DISTINCT %s.%s FROM %s.%s' % (table, _oid, db, table)
     if where:
         where = [where] if isinstance(where, basestring) else list(where)
         sql += ' WHERE %s' % ' OR '.join(where)
     result = sorted([r[_oid] for r in self._load_sql(sql)])
     return result
Beispiel #12
0
 def sql_get_oids(self, where=None):
     '''
     Query source database for a distinct list of oids.
     '''
     table = self.lconfig.get('table')
     db = self.lconfig.get('db_schema_name') or self.lconfig.get('db')
     _oid = self.lconfig.get('_oid')
     if is_array(_oid):
         _oid = _oid[0]  # get the db column, not the field alias
     sql = 'SELECT DISTINCT %s.%s FROM %s.%s' % (table, _oid, db, table)
     if where:
         where = [where] if isinstance(where, basestring) else list(where)
         sql += ' WHERE %s' % ' OR '.join(where)
     result = sorted([r[_oid] for r in self._load_sql(sql)])
     return result
Beispiel #13
0
    def get_last_field(self, field, table=None):
        '''Shortcut for querying to get the last field value for
        a given owner, cube.

        :param field: field name to query
        '''
        field = field if is_array(field) else [field]
        table = self.get_table(table, except_=False)
        if table is None:
            last = None
        else:
            is_defined(field, 'field must be defined!')
            last = self.find(table=table, fields=field, scalar=True,
                             sort=field, limit=1, descending=True,
                             date='~', default_fields=False)
        logger.debug("last %s.%s: %s" % (table, list2str(field), last))
        return last
Beispiel #14
0
 def _index_default_name(self, columns, name=None):
     table = self.config.get('table')
     is_defined(table, 'table must be defined!')
     if name:
         ix = name
     elif isinstance(columns, basestring):
         ix = '%s_%s' % (table, columns)
         #ix = columns
     elif is_array(columns, except_=False):
         ix = '%s_%s' % (table, '_'.join(tuple(columns)))
         #ix = '_'.join(tuple(columns))
     else:
         raise ValueError("unable to get default name from columns: %s" %
                          columns)
     # prefix ix_ to all index names
     ix = re.sub('^ix_', '', ix)
     ix = 'ix_%s' % ix
     return ix
Beispiel #15
0
 def _index_default_name(self, columns, name=None):
     table = self.config.get('table')
     is_defined(table, 'table must be defined!')
     if name:
         ix = name
     elif isinstance(columns, basestring):
         ix = '%s_%s' % (table, columns)
         #ix = columns
     elif is_array(columns, except_=False):
         ix = '%s_%s' % (table, '_'.join(tuple(columns)))
         #ix = '_'.join(tuple(columns))
     else:
         raise ValueError(
             "unable to get default name from columns: %s" % columns)
     # prefix ix_ to all index names
     ix = re.sub('^ix_', '', ix)
     ix = 'ix_%s' % ix
     return ix
Beispiel #16
0
    def get_new_oids(self):
        '''
        Returns a list of unique oids that have not been extracted yet.

        Essentially, a diff of distinct oids in the source database
        compared to cube.
        '''
        table = self.lconfig.get('table')
        _oid = self.lconfig.get('_oid')
        if is_array(_oid):
            _oid = _oid[0]  # get the db column, not the field alias
        last_id = self.container.get_last_field(field='_oid')
        ids = []
        if last_id:
            try:  # try to convert to integer... if not, assume unicode value
                last_id = float(last_id)
                where = "%s.%s > %s" % (table, _oid, last_id)
            except (TypeError, ValueError):
                where = "%s.%s > '%s'" % (table, _oid, last_id)
            ids = self.sql_get_oids(where)
        return ids
Beispiel #17
0
    def get_new_oids(self):
        '''
        Returns a list of unique oids that have not been extracted yet.

        Essentially, a diff of distinct oids in the source database
        compared to cube.
        '''
        table = self.lconfig.get('table')
        _oid = self.lconfig.get('_oid')
        if is_array(_oid):
            _oid = _oid[0]  # get the db column, not the field alias
        last_id = self.container.get_last_field(field='_oid')
        ids = []
        if last_id:
            try:  # try to convert to integer... if not, assume unicode value
                last_id = float(last_id)
                where = "%s.%s > %s" % (table, _oid, last_id)
            except (TypeError, ValueError):
                where = "%s.%s > '%s'" % (table, _oid, last_id)
            ids = self.sql_get_oids(where)
        return ids
Beispiel #18
0
    def get_last_field(self, field, table=None):
        '''Shortcut for querying to get the last field value for
        a given owner, cube.

        :param field: field name to query
        '''
        field = field if is_array(field) else [field]
        table = self.get_table(table, except_=False)
        if table is None:
            last = None
        else:
            is_defined(field, 'field must be defined!')
            last = self.find(table=table,
                             fields=field,
                             scalar=True,
                             sort=field,
                             limit=1,
                             descending=True,
                             date='~',
                             default_fields=False)
        logger.debug("last %s.%s: %s" % (table, list2str(field), last))
        return last
Beispiel #19
0
    def upsert(self, objects, autosnap=None, batch_size=None, table=None):
        objects = objects.values() if isinstance(objects, Mapping) else objects
        is_array(objects, 'objects must be a list')
        table = self.get_table(table)
        if autosnap is None:
            # assume autosnap:True if all objects have _end:None
            # otherwise, false (all objects have _end:non-null or
            # a mix of both)
            autosnap = all(o['_end'] is None for o in objects)
            logger.warn('AUTOSNAP auto-set to: %s' % autosnap)

        # TODO remove the use of _id and _hash
        _ids = sorted(set([o['_id'] for o in objects]))
        oids = sorted(set([o['_oid'] for o in objects]))
        session = self.session_new()
        try:
            if autosnap:
                # Snapshot - relevant only for cubes which objects
                # stored always are pushed with _end:None ('current value')
                # If we already have an object with same _oid, but different
                # _hash, we know we have a NEW object state for the given _oid
                # In this case, we update the existing object by adding
                # current object's _start -> existing _end and then add
                # the current object as=is; IOW rotate out the previous
                # version by giving it a _end and insert the new version
                # as current with _end:None
                existing = session.query(table).\
                    filter(table.c._oid.in_(oids)).\
                    filter(table.c._end.is_(None)).all()
                existing = {o._oid: o for o in existing}
                inserts = [o for o in objects if o['_oid'] not in existing]
                snap_k = len(inserts)
                dup_k = 0
                objects = [o for o in objects if o['_oid'] in existing]
                for o in objects:
                    oe = existing[o['_oid']]
                    if oe._hash != o['_hash']:
                        new_id = '%s:%s' % (oe._oid, oe._start)
                        session.execute(
                            update(table).where(table.c.id == oe.id).values(
                                _end=o['_start'], _id=new_id))
                        _ids.append(new_id)
                        inserts.append(o)
                        snap_k += 1
                    else:
                        dup_k += 1
                logger.debug('%s existing objects snapshotted' % snap_k)
                logger.debug('%s duplicates not re-saved' % dup_k)
                objects = inserts
            else:
                # History import
                # delete all existing versions for given _oids,
                # then we'll insert all the new historical versions
                # below
                # NOTE: THIS EXPECTS THAT THE CURRENT BATCH CONTAINS
                # ALL HISTORICAL VERSIONS OF A GIVEN _oid!
                session.query(table).filter(table.c._oid.in_(oids)).\
                    delete(synchronize_session=False)

            # insert new versions
            session.flush()
            if objects:
                session.execute(table.insert(), objects)
            session.commit()
        except Exception as e:
            logger.error('Session Error: %s' % e)
            session.rollback()
            raise

        return sorted(map(unicode, _ids))
Beispiel #20
0
    def upsert(self, objects, autosnap=None, batch_size=None, table=None):
        objects = objects.values() if isinstance(objects, Mapping) else objects
        is_array(objects, 'objects must be a list')
        table = self.get_table(table)
        if autosnap is None:
            # assume autosnap:True if all objects have _end:None
            # otherwise, false (all objects have _end:non-null or
            # a mix of both)
            autosnap = all(o['_end'] is None for o in objects)
            logger.warn('AUTOSNAP auto-set to: %s' % autosnap)

        # TODO remove the use of _id and _hash
        _ids = sorted(set([o['_id'] for o in objects]))
        oids = sorted(set([o['_oid'] for o in objects]))
        session = self.session_new()
        try:
            if autosnap:
                # Snapshot - relevant only for cubes which objects
                # stored always are pushed with _end:None ('current value')
                # If we already have an object with same _oid, but different
                # _hash, we know we have a NEW object state for the given _oid
                # In this case, we update the existing object by adding
                # current object's _start -> existing _end and then add
                # the current object as=is; IOW rotate out the previous
                # version by giving it a _end and insert the new version
                # as current with _end:None
                existing = session.query(table).\
                    filter(table.c._oid.in_(oids)).\
                    filter(table.c._end.is_(None)).all()
                existing = {o._oid: o for o in existing}
                inserts = [o for o in objects if o['_oid'] not in existing]
                snap_k = len(inserts)
                dup_k = 0
                objects = [o for o in objects if o['_oid'] in existing]
                for o in objects:
                    oe = existing[o['_oid']]
                    if oe._hash != o['_hash']:
                        new_id = '%s:%s' % (oe._oid, oe._start)
                        session.execute(
                            update(table).where(table.c.id == oe.id).
                            values(_end=o['_start'], _id=new_id))
                        _ids.append(new_id)
                        inserts.append(o)
                        snap_k += 1
                    else:
                        dup_k += 1
                logger.debug('%s existing objects snapshotted' % snap_k)
                logger.debug('%s duplicates not re-saved' % dup_k)
                objects = inserts
            else:
                # History import
                # delete all existing versions for given _oids,
                # then we'll insert all the new historical versions
                # below
                # NOTE: THIS EXPECTS THAT THE CURRENT BATCH CONTAINS
                # ALL HISTORICAL VERSIONS OF A GIVEN _oid!
                session.query(table).filter(table.c._oid.in_(oids)).\
                    delete(synchronize_session=False)

            # insert new versions
            session.flush()
            if objects:
                session.execute(table.insert(), objects)
            session.commit()
        except Exception as e:
            logger.error('Session Error: %s' % e)
            session.rollback()
            raise

        return sorted(map(unicode, _ids))