def insert(self, objects, session=None, table=None): objects = objects.values() if isinstance(objects, Mapping) else objects is_array(objects, 'objects must be a list') table = self.get_table(table) if self._lock_required: with LockFile(self._sqlite_path): self._exec_transaction(cmd=table.insert(), params=objects, session=session) else: self._exec_transaction(cmd=table.insert(), params=objects, session=session)
def _delta_force(self, force=None, last_update=None): force = force or self.lconfig.get('force') or False oids = [] _c = self.container save_delta_ts = False if is_array(force): oids = list(force) elif not force: save_delta_ts = True if self.lconfig.get('delta_new_ids', True): # get all new (unknown) oids new_oids = self.get_new_oids() oids.extend(new_oids) if self.lconfig.get('delta_mtime', False): last_update = self._fetch_mtime(last_update) # get only those oids that have changed since last update oids.extend(self.get_changed_oids(last_update)) elif force is True or not _c.exists(): save_delta_ts = True # if force or if the container doesn't exist # get a list of all known object ids oids = self.sql_get_oids() else: oids = [force] logger.debug("Delta Size: %s" % len(oids)) return sorted(set(oids)), save_delta_ts
def _generate_sql(self, _oids=None, sort=True): db = self.lconfig.get('db_schema_name') or self.lconfig.get('db') _oid = self.lconfig.get('_oid') if is_array(_oid): _oid = _oid[0] # get the db column, not the field alias table = self.lconfig.get('table') if not all((_oid, table, db)): raise ValueError("Must define db, table, _oid in config!") selects = [] stmts = [] for as_field, opts in self.fields.iteritems(): select = opts.get('select') if not select: # not a SQL based field continue select = '%s as %s' % (select, as_field) selects.append(select) sql = opts.get('sql') or '' sql = re.sub('\s+', ' ', sql) if sql: stmts.append(sql) selects = ', '.join(selects) stmts = ' '.join(stmts) sql = 'SELECT %s FROM %s.%s %s' % (selects, db, table, stmts) if _oids: sql += ' WHERE %s.%s in (%s)' % (table, _oid, ','.join(map(str, _oids))) if sort: sql += " ORDER BY %s.%s ASC" % (table, _oid) sql = re.sub('\s+', ' ', sql) return sql
def _generate_sql(self, _oids=None, sort=True): db = self.lconfig.get('db_schema_name') or self.lconfig.get('db') _oid = self.lconfig.get('_oid') if is_array(_oid): _oid = _oid[0] # get the db column, not the field alias table = self.lconfig.get('table') if not all((_oid, table, db)): raise ValueError("Must define db, table, _oid in config!") selects = [] stmts = [] for as_field, opts in self.fields.iteritems(): select = opts.get('select') if not select: # not a SQL based field continue select = '%s as %s' % (select, as_field) selects.append(select) sql = opts.get('sql') or '' sql = re.sub('\s+', ' ', sql) if sql: stmts.append(sql) selects = ', '.join(selects) stmts = ' '.join(stmts) sql = 'SELECT %s FROM %s.%s %s' % (selects, db, table, stmts) if _oids: sql += ' WHERE %s.%s in (%s)' % (table, _oid, ','.join( map(str, _oids))) if sort: sql += " ORDER BY %s.%s ASC" % (table, _oid) sql = re.sub('\s+', ' ', sql) return sql
def _prep_objects(self, objects): _oid = self.lconfig.get('_oid') if is_array(_oid): _oid = _oid[1] # get the field name, not the actual db column for i, obj in enumerate(objects): obj['_oid'] = obj[_oid] # map _oid objects[i] = obj return objects
def _update(self, objects): if is_null(objects): pass elif is_array(objects, except_=False): [self.add(x) for x in tuple(objects)] elif isinstance(objects, MetriqueContainer): [self.add(o) for o in objects.itervalues()] else: raise ValueError( "objs must be None, a list, tuple, dict or MetriqueContainer")
def sql_get_oids(self, where=None): ''' Query source database for a distinct list of oids. ''' table = self.lconfig.get('table') db = self.lconfig.get('db_schema_name') or self.lconfig.get('db') _oid = self.lconfig.get('_oid') if is_array(_oid): _oid = _oid[0] # get the db column, not the field alias sql = 'SELECT DISTINCT %s.%s FROM %s.%s' % (table, _oid, db, table) if where: where = [where] if isinstance(where, basestring) else list(where) sql += ' WHERE %s' % ' OR '.join(where) result = sorted([r[_oid] for r in self._load_sql(sql)]) return result
def get_last_field(self, field, table=None): '''Shortcut for querying to get the last field value for a given owner, cube. :param field: field name to query ''' field = field if is_array(field) else [field] table = self.get_table(table, except_=False) if table is None: last = None else: is_defined(field, 'field must be defined!') last = self.find(table=table, fields=field, scalar=True, sort=field, limit=1, descending=True, date='~', default_fields=False) logger.debug("last %s.%s: %s" % (table, list2str(field), last)) return last
def _index_default_name(self, columns, name=None): table = self.config.get('table') is_defined(table, 'table must be defined!') if name: ix = name elif isinstance(columns, basestring): ix = '%s_%s' % (table, columns) #ix = columns elif is_array(columns, except_=False): ix = '%s_%s' % (table, '_'.join(tuple(columns))) #ix = '_'.join(tuple(columns)) else: raise ValueError("unable to get default name from columns: %s" % columns) # prefix ix_ to all index names ix = re.sub('^ix_', '', ix) ix = 'ix_%s' % ix return ix
def _index_default_name(self, columns, name=None): table = self.config.get('table') is_defined(table, 'table must be defined!') if name: ix = name elif isinstance(columns, basestring): ix = '%s_%s' % (table, columns) #ix = columns elif is_array(columns, except_=False): ix = '%s_%s' % (table, '_'.join(tuple(columns))) #ix = '_'.join(tuple(columns)) else: raise ValueError( "unable to get default name from columns: %s" % columns) # prefix ix_ to all index names ix = re.sub('^ix_', '', ix) ix = 'ix_%s' % ix return ix
def get_new_oids(self): ''' Returns a list of unique oids that have not been extracted yet. Essentially, a diff of distinct oids in the source database compared to cube. ''' table = self.lconfig.get('table') _oid = self.lconfig.get('_oid') if is_array(_oid): _oid = _oid[0] # get the db column, not the field alias last_id = self.container.get_last_field(field='_oid') ids = [] if last_id: try: # try to convert to integer... if not, assume unicode value last_id = float(last_id) where = "%s.%s > %s" % (table, _oid, last_id) except (TypeError, ValueError): where = "%s.%s > '%s'" % (table, _oid, last_id) ids = self.sql_get_oids(where) return ids
def upsert(self, objects, autosnap=None, batch_size=None, table=None): objects = objects.values() if isinstance(objects, Mapping) else objects is_array(objects, 'objects must be a list') table = self.get_table(table) if autosnap is None: # assume autosnap:True if all objects have _end:None # otherwise, false (all objects have _end:non-null or # a mix of both) autosnap = all(o['_end'] is None for o in objects) logger.warn('AUTOSNAP auto-set to: %s' % autosnap) # TODO remove the use of _id and _hash _ids = sorted(set([o['_id'] for o in objects])) oids = sorted(set([o['_oid'] for o in objects])) session = self.session_new() try: if autosnap: # Snapshot - relevant only for cubes which objects # stored always are pushed with _end:None ('current value') # If we already have an object with same _oid, but different # _hash, we know we have a NEW object state for the given _oid # In this case, we update the existing object by adding # current object's _start -> existing _end and then add # the current object as=is; IOW rotate out the previous # version by giving it a _end and insert the new version # as current with _end:None existing = session.query(table).\ filter(table.c._oid.in_(oids)).\ filter(table.c._end.is_(None)).all() existing = {o._oid: o for o in existing} inserts = [o for o in objects if o['_oid'] not in existing] snap_k = len(inserts) dup_k = 0 objects = [o for o in objects if o['_oid'] in existing] for o in objects: oe = existing[o['_oid']] if oe._hash != o['_hash']: new_id = '%s:%s' % (oe._oid, oe._start) session.execute( update(table).where(table.c.id == oe.id).values( _end=o['_start'], _id=new_id)) _ids.append(new_id) inserts.append(o) snap_k += 1 else: dup_k += 1 logger.debug('%s existing objects snapshotted' % snap_k) logger.debug('%s duplicates not re-saved' % dup_k) objects = inserts else: # History import # delete all existing versions for given _oids, # then we'll insert all the new historical versions # below # NOTE: THIS EXPECTS THAT THE CURRENT BATCH CONTAINS # ALL HISTORICAL VERSIONS OF A GIVEN _oid! session.query(table).filter(table.c._oid.in_(oids)).\ delete(synchronize_session=False) # insert new versions session.flush() if objects: session.execute(table.insert(), objects) session.commit() except Exception as e: logger.error('Session Error: %s' % e) session.rollback() raise return sorted(map(unicode, _ids))
def upsert(self, objects, autosnap=None, batch_size=None, table=None): objects = objects.values() if isinstance(objects, Mapping) else objects is_array(objects, 'objects must be a list') table = self.get_table(table) if autosnap is None: # assume autosnap:True if all objects have _end:None # otherwise, false (all objects have _end:non-null or # a mix of both) autosnap = all(o['_end'] is None for o in objects) logger.warn('AUTOSNAP auto-set to: %s' % autosnap) # TODO remove the use of _id and _hash _ids = sorted(set([o['_id'] for o in objects])) oids = sorted(set([o['_oid'] for o in objects])) session = self.session_new() try: if autosnap: # Snapshot - relevant only for cubes which objects # stored always are pushed with _end:None ('current value') # If we already have an object with same _oid, but different # _hash, we know we have a NEW object state for the given _oid # In this case, we update the existing object by adding # current object's _start -> existing _end and then add # the current object as=is; IOW rotate out the previous # version by giving it a _end and insert the new version # as current with _end:None existing = session.query(table).\ filter(table.c._oid.in_(oids)).\ filter(table.c._end.is_(None)).all() existing = {o._oid: o for o in existing} inserts = [o for o in objects if o['_oid'] not in existing] snap_k = len(inserts) dup_k = 0 objects = [o for o in objects if o['_oid'] in existing] for o in objects: oe = existing[o['_oid']] if oe._hash != o['_hash']: new_id = '%s:%s' % (oe._oid, oe._start) session.execute( update(table).where(table.c.id == oe.id). values(_end=o['_start'], _id=new_id)) _ids.append(new_id) inserts.append(o) snap_k += 1 else: dup_k += 1 logger.debug('%s existing objects snapshotted' % snap_k) logger.debug('%s duplicates not re-saved' % dup_k) objects = inserts else: # History import # delete all existing versions for given _oids, # then we'll insert all the new historical versions # below # NOTE: THIS EXPECTS THAT THE CURRENT BATCH CONTAINS # ALL HISTORICAL VERSIONS OF A GIVEN _oid! session.query(table).filter(table.c._oid.in_(oids)).\ delete(synchronize_session=False) # insert new versions session.flush() if objects: session.execute(table.insert(), objects) session.commit() except Exception as e: logger.error('Session Error: %s' % e) session.rollback() raise return sorted(map(unicode, _ids))