class Relation: """helper class to provide for simple relations between items. relations can be strengthend or lessened(?) by giving them a positive or negative 'nudge'. >>> r = Relation('test') >>> r.get(100, 200) 0.5 >>> '{:2.2f}'.format(r.positive(100, 200)) '0.71' >>> '{:2.2f}'.format(r.negative(100, 200)) '0.43' >>> '{:2.2f}'.format(r.get(100, 200)) '0.43' >>> r.get(100, 200, 'pos') 1.0 >>> r.get(100, 200, 'neg') 1.0 """ def __init__(self, name, db=None, mix=0.5, min_cache=100, max_cache=1000, commit_after=100): """create a new relation class. >>> db = sqlite3.connect(':memory:') >>> r = Relation('test', db=db, mix=0.75, min_cache=100, max_cache=200) >>> r.get(100, 200) 0.5 >>> '{:2.2f}'.format(r.positive(100, 200)) '0.73' >>> del r >>> r = Relation('test', db=db, mix=0.75, min_cache=100, max_cache=200) >>> '{:2.2f}'.format(r.get(100, 200)) '0.73' """ self.__name = name # for caching self.__cache = Cache(min_cache, max_cache) # factors for calculating the relation self.__rolling = mix self.__stable = 1.0 - mix # commit changes to the database after save_after changes self.__commit_after = commit_after self.__pending_changes = 0 self.__tbl_name = 'relation_{}'.format(self.__name) if db is None: db = sqlite3.connect(':memory:') self.__db = db self.__db.row_factory = dict_factory self.__db.isolation_level = 'Immediate' self.__cursor = self.__db.cursor() self.__init_db__() def __init_db__(self): sql = '''CREATE TABLE {} ( keyl INTEGER, keyh INTEGER, pos REAL, neg REAL, rel REAL, UNIQUE(keyl, keyh))'''.format(self.__tbl_name) self.__cursor.execute('''SELECT * FROM sqlite_master WHERE type='table' AND name=?''', (self.__tbl_name,)) r = self.__cursor.fetchone() # FIXME: this is probably not reliable if not (r and sql == r['sql']): self.__cursor.execute(sql) def min_cache(self, min_cache=None): """set minimum cache size. >>> r = Relation('test', min_cache=100) >>> r.min_cache() 100 >>> r.min_cache(200) 200 >>> r.min_cache() 200 """ return self.__cache.min_cache(min_cache) def max_cache(self, max_cache=None): """set maximum cache size. >>> r = Relation('test', max_cache=1000) >>> r.max_cache() 1000 >>> r.max_cache(200) 200 >>> r.max_cache() 200 """ return self.__cache.max_cache(max_cache) def cache_size(self): """return the current size of the cache. >>> r = Relation('test') >>> r.cache_size() 0 >>> r.get(100, 200) 0.5 >>> r.cache_size() 1 """ return self.__cache.cache_size() def commit_after(self, commit_after=None): """commit changes to the database after commit_after changes. >>> r = Relation('test', commit_after=1000) >>> r.commit_after() 1000 >>> r.commit_after(200) 200 >>> r.commit_after() 200 """ if not commit_after is None: self.__commit_after = commit_after return self.__commit_after def _keys(self, key1, key2): """returns a key for cache access (unicode), keyl and keyh with keyl being lower then keyh. >>> r = Relation('test', commit_after=1000) >>> r._keys('one', 'two') (u'one|two', 'one', 'two') >>> r._keys('two', 'one') (u'one|two', 'one', 'two') >>> r._keys(1, 2) (u'1|2', 1, 2) >>> r._keys(2, 1) (u'1|2', 1, 2) """ if key1 > key2: return u'{}|{}'.format(key2, key1), key2, key1 return u'{}|{}'.format(key1, key2), key1, key2 def get(self, key1, key2, attr='rel'): """return the value of attr between key1 and key2 >>> r = Relation('test') >>> r.get(100, 200) 0.5 >>> r.get(100, 200, 'pos') 0.0 >>> r.get(100, 200, 'neg') 0.0 """ key, keyl, keyh = self._keys(key1, key2) return self._get(key, keyl, keyh)[attr] def getall(self, key1): """return all relations for key1. >>> r = Relation('test') >>> r.get(100, 200) 0.5 >>> r.get(100, 300) 0.5 >>> [(key, rel) for key, rel in r.getall(100)] [(200, 0.5), (300, 0.5)] >>> [(key, rel) for key, rel in r.getall(200)] [(100, 0.5)] """ self.__cursor.execute(''' SELECT keyh AS key2 FROM {} WHERE keyl=? UNION ALL SELECT keyl AS key2 FROM {} WHERE keyh=?'''.format( self.__tbl_name, self.__tbl_name), (key1,key1,)) for row in self.__cursor.fetchall(): yield row['key2'], self.get(key1, row['key2']) def _get(self, key, keyl, keyh, save=True): """Internal function to get the relation from cache or load it from the database. >>> r = Relation('test') >>> key, keyl, keyh = r._keys(1, 2) >>> r._get(key, keyl, keyh) {'neg': 0.0, 'keyh': 2, 'pos': 0.0, 'rel': 0.5, 'keyl': 1} >>> r.positive(1, 2) 0.7083333333333333 >>> r._get(key, keyl, keyh) {'neg': 0.0, 'keyh': 2, 'pos': 1.0, 'rel': 0.7083333333333333, 'keyl': 1} """ if not self.__cache.has_key(key): relation = self._load(key, keyl, keyh, save) self.__cache.set(key, relation) return relation return self.__cache.get(key) def _load(self, key, keyl, keyh, save=True): """Internal function to load the relation from the database, or create a new one. >>> r = Relation('test') >>> key, keyl, keyh = r._keys(1, 2) >>> r._load(key, keyl, keyh) {'neg': 0.0, 'keyh': 2, 'pos': 0.0, 'rel': 0.5, 'keyl': 1} """ self.__cursor.execute('''SELECT * FROM {} WHERE keyl=? AND keyh=?'''.format(self.__tbl_name), (keyl, keyh,)) relation = self.__cursor.fetchone() if not relation: relation = { 'keyl':keyl, 'keyh':keyh, 'pos':0.0, 'neg':0.0, 'rel':0.5 } if save: self._save(relation) return relation def _save(self, relation): """internal method to save a relation to the database""" self.__cursor.execute('''INSERT OR REPLACE INTO {} (keyl, keyh, pos, neg, rel) VALUES (?, ?, ?, ?, ?)'''.format(self.__tbl_name), (relation['keyl'], relation['keyh'], relation['pos'], relation['neg'], relation['rel'],)) self.__pending_changes += 1 if self.__pending_changes > self.__commit_after: self.__db.commit() self.__pending_changes = 0 def _commit(self): """Force commit.""" self.__db.commit() self.__pending_changes = 0 def __del__(self): """commit changes to the database on garbage collection etc""" self.__db.commit() def positive(self, key1 ,key2, inc=1.0): """update / add a positive relation between key1 and key2""" key, keyl, keyh = self._keys(key1, key2) return self._update(key, keyl, keyh, inc, True)['rel'] def negative(self, key1 ,key2, dec=1.0): """update / add a negative relation between key1 and key2""" key, keyl, keyh = self._keys(key1, key2) return self._update(key, keyl, keyh, dec, False)['rel'] def _update(self, key, keyl, keyh, diff, positive): """internal method to update / add a relation between keyl and keyh""" r = self._get(key, keyl, keyh, False) if positive == True: r['pos'] += diff rolling = r['rel'] + (1.0 - r['rel']) / 2.0 else: r['neg'] += diff rolling = r['rel'] - r['rel'] / 2.0 stable = (r['pos'] + 1.0) / (r['pos'] + r['neg'] + 2.0) r['rel'] = rolling * self.__rolling + stable * self.__stable self._save(r) return r