class MultiRelationCls(object): c = operators.Slots() rels = rels_tmp def __init__(self, thing1, thing2, *a, **kw): r = self.rel(thing1, thing2) self.__class__ = r self.__init__(thing1, thing2, *a, **kw) @classmethod def rel(cls, thing1, thing2): t1 = thing1 if isinstance(thing1, ThingMeta) else thing1.__class__ t2 = thing2 if isinstance(thing2, ThingMeta) else thing2.__class__ return cls.rels[(t1, t2)] @classmethod def _query(cls, *rules, **kw): #TODO it should be possible to send the rules and kw to #the merge constructor queries = [r._query(*rules, **kw) for r in cls.rels.values()] if "sort" in kw: print "sorting MultiRelations is not supported" return Merge(queries) @classmethod def _fast_query(cls, sub, obj, name, data=True, eager_load=True, thing_data=False): #divide into types def type_dict(items): types = {} for i in items: types.setdefault(i.__class__, []).append(i) return types sub_dict = type_dict(tup(sub)) obj_dict = type_dict(tup(obj)) #for each pair of types, see if we have a query to send res = {} for types, rel in cls.rels.iteritems(): t1, t2 = types if sub_dict.has_key(t1) and obj_dict.has_key(t2): res.update( rel._fast_query(sub_dict[t1], obj_dict[t2], name, data=data, eager_load=eager_load, thing_data=thing_data)) return res
class DataThing(object): _base_props = () _int_props = () _data_int_props = () _int_prop_suffix = None _defaults = {} _essentials = () c = operators.Slots() __safe__ = False _cache = g.cache def __init__(self): safe_set_attr = SafeSetAttr(self) with safe_set_attr: self.safe_set_attr = safe_set_attr self._dirties = {} self._t = {} self._created = False self._loaded = True #TODO some protection here? def __setattr__(self, attr, val, make_dirty=True): if attr.startswith('__') or self.__safe__: object.__setattr__(self, attr, val) return if attr.startswith('_'): #assume baseprops has the attr if make_dirty and hasattr(self, attr): old_val = getattr(self, attr) object.__setattr__(self, attr, val) if not attr in self._base_props: return else: old_val = self._t.get(attr, self._defaults.get(attr)) self._t[attr] = val if make_dirty and val != old_val: self._dirties[attr] = (old_val, val) def __setstate__(self, state): # pylibmc's automatic unpicking will call __setstate__ if it exists. # if we don't implement __setstate__ the check for existence will fail # in an atypical (and not properly handled) way because we override # __getattr__. the implementation provided here is identical to what # would happen in the default unimplemented case. self.__dict__ = state def __getattr__(self, attr): try: return self._t[attr] except KeyError: try: return self._defaults[attr] except KeyError: # attr didn't exist--continue on to error recovery below pass try: _id = object.__getattribute__(self, "_id") except AttributeError: _id = "???" try: cl = object.__getattribute__(self, "__class__").__name__ except AttributeError: cl = "???" if self._loaded: nl = "it IS loaded" else: nl = "it is NOT loaded" try: id_str = "%d" % _id except TypeError: id_str = "%r" % _id descr = '%s(%s).%s' % (cl, id_str, attr) essentials = object.__getattribute__(self, "_essentials") deleted = object.__getattribute__(self, "_deleted") if deleted: nl += " and IS deleted." else: nl += " and is NOT deleted." if attr in essentials and not deleted: g.log.error("%s not found; %s forcing reload.", descr, nl) self._load() try: return self._t[attr] except KeyError: g.log.error("reload of %s didn't help.", descr) raise AttributeError, '%s not found; %s' % (descr, nl) def _cache_key(self): return thing_prefix(self.__class__.__name__, self._id) def _other_self(self): """Load from the cached version of myself. Skip the local cache.""" l = self._cache.get(self._cache_key(), allow_local=False) if l and l._id != self._id: g.log.error("thing.py: Doppleganger on read: got %s for %s", (l, self)) self._cache.delete(self._cache_key()) return return l def _cache_myself(self): ck = self._cache_key() self._cache.set(ck, self, time=THING_CACHE_TTL) def _sync_latest(self): """Load myself from the cache to and re-apply the .dirties list to make sure we don't overwrite a previous commit. """ other_self = self._other_self() if not other_self: return self._dirty #copy in the cache's version for prop in self._base_props: self.__setattr__(prop, getattr(other_self, prop), False) if other_self._loaded: self._t = other_self._t #re-apply the .dirties old_dirties = self._dirties self._dirties = {} for k, (old_val, new_val) in old_dirties.iteritems(): setattr(self, k, new_val) #return whether we're still dirty or not return self._dirty @classmethod def record_cache_write(cls, event, delta=1): raise NotImplementedError @classmethod def record_lookup(cls, data, delta=1): raise NotImplementedError def _commit(self, keys=None): lock = None try: if not self._created: begin() self._create() just_created = True self.record_cache_write(event="create") else: just_created = False lock = g.make_lock("thing_commit", 'commit_' + self._fullname) lock.acquire() if not just_created and not self._sync_latest(): #sync'd and we have nothing to do now, but we still cache anyway self._cache_myself() return if not just_created: self.record_cache_write(event="modify") # begin is a no-op if already done, but in the not-just-created # case we need to do this here because the else block is not # executed when the try block is exited prematurely in any way # (including the return in the above branch) begin() to_set = self._dirties.copy() if keys: keys = tup(keys) for key in to_set.keys(): if key not in keys: del to_set[key] data_props = {} thing_props = {} for k, (old_value, new_value) in to_set.iteritems(): if k.startswith('_'): thing_props[k[1:]] = new_value else: data_props[k] = new_value if data_props: self._set_data(self._type_id, self._id, just_created, **data_props) if thing_props: self._set_props(self._type_id, self._id, **thing_props) if keys: for k in keys: if self._dirties.has_key(k): del self._dirties[k] else: self._dirties.clear() except: rollback() raise else: commit() self._cache_myself() finally: if lock: lock.release() hooks.get_hook("thing.commit").call(thing=self, changes=to_set) @classmethod def _load_multi(cls, need): need = tup(need) need_ids = [n._id for n in need] datas = cls._get_data(cls._type_id, need_ids) to_save = {} try: essentials = object.__getattribute__(cls, "_essentials") except AttributeError: essentials = () for i in need: #if there wasn't any data, keep the empty dict i._t.update(datas.get(i._id, i._t)) i._loaded = True for attr in essentials: if attr not in i._t: print "Warning: %s is missing %s" % (i._fullname, attr) to_save[i._id] = i prefix = thing_prefix(cls.__name__) #write the data to the cache cls._cache.set_multi(to_save, prefix=prefix, time=THING_CACHE_TTL) def _load(self): self._load_multi(self) def _safe_load(self): if not self._loaded: self._load() def _incr(self, prop, amt=1): if self._dirty: raise ValueError, "cannot incr dirty thing" #make sure we're incr'ing an _int_prop or _data_int_prop. if prop not in self._int_props: if (prop in self._data_int_props or self._int_prop_suffix and prop.endswith(self._int_prop_suffix)): #if we're incr'ing a data_prop, make sure we're loaded if not self._loaded: self._load() else: msg = ( "cannot incr non int prop %r on %r -- it's not in %r or %r" % (prop, self, self._int_props, self._data_int_props)) raise ValueError, msg with g.make_lock("thing_commit", 'commit_' + self._fullname): self._sync_latest() old_val = getattr(self, prop) if self._defaults.has_key( prop) and self._defaults[prop] == old_val: #potential race condition if the same property gets incr'd #from default at the same time setattr(self, prop, old_val + amt) self._commit(prop) else: self.__setattr__(prop, old_val + amt, False) #db if prop.startswith('_'): tdb.incr_thing_prop(self._type_id, self._id, prop[1:], amt) else: self._incr_data(self._type_id, self._id, prop, amt) self._cache_myself() self.record_cache_write(event="incr") @property def _id36(self): return to36(self._id) @class_property def _fullname_prefix(cls): return cls._type_prefix + to36(cls._type_id) @classmethod def _fullname_from_id36(cls, id36): return cls._fullname_prefix + '_' + id36 @property def _fullname(self): return self._fullname_from_id36(self._id36) #TODO error when something isn't found? @classmethod def _byID(cls, ids, data=False, return_dict=True, stale=False, ignore_missing=False): ids, single = tup(ids, ret_is_single=True) prefix = thing_prefix(cls.__name__) for x in ids: if not isinstance(x, (int, long)): raise ValueError('non-integer thing_id in %r' % ids) if x > tdb.MAX_THING_ID: raise NotFound('huge thing_id in %r' % ids) elif x < tdb.MIN_THING_ID: raise NotFound('negative thing_id in %r' % ids) if not single and not ids: if return_dict: return {} else: return [] cls.record_lookup(data=data, delta=len(ids)) def count_found(ret, still_need): cls._cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) if not cls._cache.stats: count_found = None def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) # caching happens in sgm, but is less intrusive to count here cls.record_cache_write(event="cache", delta=len(items)) return items bases = sgm(cls._cache, ids, items_db, prefix, time=THING_CACHE_TTL, stale=stale, found_fn=count_found, stat_subname=cls.__name__) # Check to see if we found everything we asked for missing = [] for i in ids: if i not in bases: missing.append(i) elif bases[i] and bases[i]._id != i: g.log.error( "thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) for i in missing: ids.remove(i) if data: need = [] for v in bases.itervalues(): if not v._loaded: need.append(v) if need: cls._load_multi(need) if single: return bases[ids[0]] if ids else None elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids)) @classmethod def _byID36(cls, id36s, return_dict=True, **kw): id36s, single = tup(id36s, True) # will fail if it's not a string ids = [int(x, 36) for x in id36s] things = cls._byID(ids, return_dict=True, **kw) things = {thing._id36: thing for thing in things.itervalues()} if single: return things.values()[0] elif return_dict: return things else: return filter(None, (things.get(i) for i in id36s)) @classmethod def _by_fullname(cls, names, return_dict=True, ignore_missing=False, **kw): names, single = tup(names, True) table = {} lookup = {} # build id list by type for fullname in names: try: real_type, thing_id = fullname.split('_') #distinguish between things and realtions if real_type[0] == 't': type_dict = thing_types elif real_type[0] == 'r': type_dict = rel_types else: raise NotFound real_type = type_dict[int(real_type[1:], 36)] thing_id = int(thing_id, 36) lookup[fullname] = (real_type, thing_id) table.setdefault(real_type, []).append(thing_id) except (KeyError, ValueError): if single: raise NotFound # lookup ids for each type identified = {} for real_type, thing_ids in table.iteritems(): i = real_type._byID(thing_ids, ignore_missing=ignore_missing, **kw) identified[real_type] = i # interleave types in original order of the name res = [] for fullname in names: if lookup.has_key(fullname): real_type, thing_id = lookup[fullname] thing = identified.get(real_type, {}).get(thing_id) if not thing and ignore_missing: continue res.append((fullname, thing)) if single: return res[0][1] if res else None elif return_dict: return dict(res) else: return [x for i, x in res] @property def _dirty(self): return bool(len(self._dirties)) @classmethod def _query(cls, *a, **kw): raise NotImplementedError() @classmethod def _build(*a, **kw): raise NotImplementedError() def _get_data(*a, **kw): raise NotImplementedError() def _set_data(*a, **kw): raise NotImplementedError() def _incr_data(*a, **kw): raise NotImplementedError() def _get_item(*a, **kw): raise NotImplementedError def _create(self): base_props = (getattr(self, prop) for prop in self._base_props) self._id = self._make_fn(self._type_id, *base_props) self._created = True
class DataThing(object): _base_props = () _int_props = () _data_int_props = () _int_prop_suffix = None _defaults = {} _essentials = () c = operators.Slots() __safe__ = False _cache = g.cache def __init__(self): safe_set_attr = SafeSetAttr(self) with safe_set_attr: self.safe_set_attr = safe_set_attr self._dirties = {} self._t = {} self._created = False #TODO some protection here? def __setattr__(self, attr, val, make_dirty=True): if attr.startswith('__') or self.__safe__: object.__setattr__(self, attr, val) return if attr.startswith('_'): #assume baseprops has the attr if make_dirty and hasattr(self, attr): old_val = getattr(self, attr) object.__setattr__(self, attr, val) if not attr in self._base_props: return else: old_val = self._t.get(attr, self._defaults.get(attr)) self._t[attr] = val if make_dirty and val != old_val: self._dirties[attr] = (old_val, val) def __setstate__(self, state): # pylibmc's automatic unpicking will call __setstate__ if it exists. # if we don't implement __setstate__ the check for existence will fail # in an atypical (and not properly handled) way because we override # __getattr__. the implementation provided here is identical to what # would happen in the default unimplemented case. self.__dict__ = state def __getattr__(self, attr): try: return self._t[attr] except KeyError: try: return self._defaults[attr] except KeyError: # attr didn't exist--continue on to error recovery below pass try: _id = object.__getattribute__(self, "_id") except AttributeError: _id = "???" try: cl = object.__getattribute__(self, "__class__").__name__ except AttributeError: cl = "???" try: id_str = "%d" % _id except TypeError: id_str = "%r" % _id descr = '%s(%s).%s' % (cl, id_str, attr) deleted = object.__getattribute__(self, "_deleted") if deleted: nl = "it IS deleted." else: nl = "it is NOT deleted." raise AttributeError, '%s not found; %s' % (descr, nl) @classmethod def _cache_prefix(cls): return cls.__name__ + '_' def _cache_key(self): prefix = self._cache_prefix() return "{prefix}{id}".format(prefix=prefix, id=self._id) @classmethod def get_things_from_db(cls, ids): """Read props from db and return id->thing dict.""" raise NotImplementedError @classmethod def get_things_from_cache(cls, ids, stale=False, allow_local=True): """Read things from cache and return id->thing dict.""" cache = cls._cache prefix = cls._cache_prefix() things_by_id = cache.get_multi( ids, prefix=prefix, stale=stale, allow_local=allow_local, stat_subname=cls.__name__) return things_by_id @classmethod def write_things_to_cache(cls, things_by_id): """Write id->thing dict to cache.""" cache = cls._cache prefix = cls._cache_prefix() cache.set_multi(things_by_id, prefix=prefix, time=THING_CACHE_TTL) def get_read_modify_write_lock(self): """Return the lock to be used when doing a read-modify-write. When modifying a Thing we must read its current version from cache and update that to avoid clobbering modifications made by other processes after we first read the Thing. """ return g.make_lock("thing_commit", 'commit_' + self._fullname) def write_new_thing_to_db(self): """Write the new thing to db and return its id.""" raise NotImplementedError def write_props_to_db(self, props, data_props, brand_new_thing): """Write the props to db.""" raise NotImplementedError def write_changes_to_db(self, changes, brand_new_thing=False): """Write changes to db.""" if not changes: return data_props = {} props = {} for prop, (old_value, new_value) in changes.iteritems(): if prop.startswith('_'): props[prop[1:]] = new_value else: data_props[prop] = new_value self.write_props_to_db(props, data_props, brand_new_thing) def write_thing_to_cache(self, lock, brand_new_thing=False): """After modifying a thing write the entire object to cache. The caller must either pass in the read_modify_write lock or be acting for a newly created thing (that has therefore never been cached before). """ assert brand_new_thing or lock.have_lock cache = self.__class__._cache key = self._cache_key() cache.set(key, self, time=THING_CACHE_TTL) def update_from_cache(self, lock): """Read the current value of thing from cache and update self. To be used before writing cache to avoid clobbering changes made by a different process. Must be called under write lock. """ assert lock.have_lock # disallow reading from local cache because we want to pull in changes # made by other processes since we first read this thing. other_selfs = self.__class__.get_things_from_cache( [self._id], allow_local=False) if not other_selfs: return other_self = other_selfs[self._id] # update base_props for base_prop in self._base_props: other_self_val = getattr(other_self, base_prop) self.__setattr__(base_prop, other_self_val, make_dirty=False) # update data_props self._t = other_self._t # reapply changes made to self self_changes = self._dirties self._dirties = {} for data_prop, (old_val, new_val) in self_changes.iteritems(): setattr(self, data_prop, new_val) @classmethod def record_cache_write(cls, event, delta=1): raise NotImplementedError @classmethod def record_lookup(cls, data, delta=1): raise NotImplementedError def _commit(self): """Write changes to db and write the full object to cache. When writing to postgres we write only the changes. The data in postgres is the canonical version. For a few reasons (speed, decreased load on postgres, postgres replication lag) we want to keep a perfectly consistent copy of the thing in cache. To achieve this we read the current value of the thing from cache to pull in any changes made by other processes, apply our changes to the thing, and finally set it in cache. This is done under lock to ensure read/write safety. If the cached thing is evicted or expires we must read from postgres. Failure cases: * Write to cache fails. The cache now contains stale/incorrect data. To ensure we recover quickly TTLs should be set as low as possible without overloading postgres. * There is long replication lag and high cache pressure. When an object is modified it is written to cache, but quickly evicted, The next lookup might read from a postgres secondary before the changes have been replicated there. To protect against this replication lag and cache pressure should be monitored and kept at acceptable levels. * Near simultaneous writes that create a logical inconsistency. Say request 1 and request 2 both read state 0 of a Thing. Request 1 changes Thing.prop from True to False and writes to cache and postgres. Request 2 examines the value of Thing.prop, sees that it is True, and due to logic in the app sets Thing.prop_is_true to True and writes to cache and postgres. Request 2 didn't clobber the change made by request 1, but it made a logically incorrect change--the resulting state is Thing.prop = False and Thing.prop_is_true = True. Logic like this should be identified and avoided wherever possible, or protected against using locks. """ if not self._created: with TdbTransactionContext(): _id = self.write_new_thing_to_db() self._id = _id self._created = True changes = self._dirties.copy() self.write_changes_to_db(changes, brand_new_thing=True) self._dirties.clear() self.write_thing_to_cache(lock=None, brand_new_thing=True) self.record_cache_write(event="create") else: with self.get_read_modify_write_lock() as lock: self.update_from_cache(lock) if not self._dirty: return with TdbTransactionContext(): changes = self._dirties.copy() self.write_changes_to_db(changes, brand_new_thing=False) self._dirties.clear() self.write_thing_to_cache(lock) self.record_cache_write(event="modify") hooks.get_hook("thing.commit").call(thing=self, changes=changes) def _incr(self, prop, amt=1): raise NotImplementedError @property def _id36(self): return to36(self._id) @class_property def _fullname_prefix(cls): return cls._type_prefix + to36(cls._type_id) @classmethod def _fullname_from_id36(cls, id36): return cls._fullname_prefix + '_' + id36 @property def _fullname(self): return self._fullname_from_id36(self._id36) @classmethod def _byID(cls, ids, data=True, return_dict=True, stale=False, ignore_missing=False): # data props are ALWAYS loaded, data keyword is meaningless ids, single = tup(ids, ret_is_single=True) for x in ids: if not isinstance(x, (int, long)): raise ValueError('non-integer thing_id in %r' % ids) if x > tdb.MAX_THING_ID: raise NotFound('huge thing_id in %r' % ids) elif x < tdb.MIN_THING_ID: raise NotFound('negative thing_id in %r' % ids) if not single and not ids: if return_dict: return {} else: return [] cls.record_lookup(data=True, delta=len(ids)) things_by_id = cls.get_things_from_cache(ids, stale=stale) missing_ids = [_id for _id in ids if _id not in things_by_id ] if missing_ids: from_db_by_id = cls.get_things_from_db(missing_ids) else: from_db_by_id = {} if from_db_by_id: # XXX: We don't have the write lock here, so we could clobber # changes made by other processes cls.write_things_to_cache(from_db_by_id) cls.record_cache_write(event="cache", delta=len(from_db_by_id)) things_by_id.update(from_db_by_id) # Check to see if we found everything we asked for missing = [_id for _id in ids if _id not in things_by_id] if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) if missing: ids = [_id for _id in ids if _id not in missing] if single: return things_by_id[ids[0]] if ids else None elif return_dict: return things_by_id else: return filter(None, (things_by_id.get(_id) for _id in ids)) @classmethod def _byID36(cls, id36s, return_dict = True, **kw): id36s, single = tup(id36s, True) # will fail if it's not a string ids = [ int(x, 36) for x in id36s ] things = cls._byID(ids, return_dict=True, **kw) things = {thing._id36: thing for thing in things.itervalues()} if single: return things.values()[0] elif return_dict: return things else: return filter(None, (things.get(i) for i in id36s)) @classmethod def _by_fullname(cls, names, return_dict = True, ignore_missing=False, **kw): names, single = tup(names, True) table = {} lookup = {} # build id list by type for fullname in names: try: real_type, thing_id = fullname.split('_') #distinguish between things and realtions if real_type[0] == 't': type_dict = thing_types elif real_type[0] == 'r': type_dict = rel_types else: raise NotFound real_type = type_dict[int(real_type[1:], 36)] thing_id = int(thing_id, 36) lookup[fullname] = (real_type, thing_id) table.setdefault(real_type, []).append(thing_id) except (KeyError, ValueError): if single: raise NotFound # lookup ids for each type identified = {} for real_type, thing_ids in table.iteritems(): i = real_type._byID(thing_ids, ignore_missing=ignore_missing, **kw) identified[real_type] = i # interleave types in original order of the name res = [] for fullname in names: if lookup.has_key(fullname): real_type, thing_id = lookup[fullname] thing = identified.get(real_type, {}).get(thing_id) if not thing and ignore_missing: continue res.append((fullname, thing)) if single: return res[0][1] if res else None elif return_dict: return dict(res) else: return [x for i, x in res] @property def _dirty(self): return bool(len(self._dirties)) @classmethod def _query(cls, *a, **kw): raise NotImplementedError()