def __init__(self, pathRoot, logger=None): self._connection = DatabaseConnection(pathRoot, self.version) if logger == None: self._logger = FakeLogView('Database Log') else: self._logger = logger self._connection.acquire() try: self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "files" ("id" INTEGER PRIMARY KEY NOT NULL , "path" TEXT NOT NULL, "timestamp" INTEGER NOT NULL)''') self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "words" ("id" INTEGER PRIMARY KEY NOT NULL , "word" TEXT)''') self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "positions" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "id_file" INTEGER NOT NULL, "id_word" INTEGER NOT NULL, "position" INTEGER NOT NULL, FOREIGN KEY(id_file) REFERENCES files(id), FOREIGN KEY(id_word) REFERENCES words(id))''') self._connection.commit() self._logger.write('loaded sqlite cache...') self._logger.write(str(self.count_of_files(False)) + ' files...') self._logger.write(str(self.count_of_words(False)) + ' words...') finally: self._connection.release()
def __init__(self, pathRoot, logger=None): self._pathRoot = '' self._lock = None self._files = [] self._words = dict() self._dirty = False if logger == None: self._logger = FakeLogView('Database Log') else: self._logger = logger self._pathRoot = pathRoot self._lock = threading.RLock() self._lock.acquire() #init databases try: with open(self._pathRoot + '/database_words' + self.version + '.cPickle', 'rb') as cache: self._words = cPickle.load(cache) self._logger.write('loaded word cache...' + str(len(self._words)) + ' words.') except Exception as e: self._words = None self._logger.write(str(e)) try: with open(self._pathRoot + '/database_files' + self.version + '.cPickle', 'rb') as cache: self._files = cPickle.load(cache) self._logger.write('loaded file cache...' + str(len(self._files)) + ' files.') except Exception as e: self._files = None self._logger.write(str(e)) if self._words == None or self._files == None: self._words = dict() self._logger.write('created word cache!') self._files = dict() self._logger.write('created file cache!') self._dirty = True self._lock.release()
def __init__(self, pathRoot, logger=None): self._lock = None self._words = None self._files = None self._pathRoot = pathRoot if logger == None: self._logger = FakeLogView('Database Log') else: self._logger = logger self._lock = threading.RLock() self._lock.acquire() try: self._files = shelve.open(self._pathRoot + '/shelve_files.' + self.version, flag='c') self._words = shelve.open(self._pathRoot + '/shelve_words.' + self.version, flag='c') self._logger.write('Shelve DB loaded file cache...' + str(len(self._files)) + ' files.') self._logger.write('Shelve DB loaded word cache...' + str(len(self._words)) + ' words.') finally: self._lock.release()
class DatabaseByShelfing(): version = '0.0.3' def __init__(self, pathRoot, logger=None): self._lock = None self._words = None self._files = None self._pathRoot = pathRoot if logger == None: self._logger = FakeLogView('Database Log') else: self._logger = logger self._lock = threading.RLock() self._lock.acquire() try: self._files = shelve.open(self._pathRoot + '/shelve_files.' + self.version, flag='c') self._words = shelve.open(self._pathRoot + '/shelve_words.' + self.version, flag='c') self._logger.write('Shelve DB loaded file cache...' + str(len(self._files)) + ' files.') self._logger.write('Shelve DB loaded word cache...' + str(len(self._words)) + ' words.') finally: self._lock.release() def file_position_get_for_word(self, word): self._lock.acquire() try: return self._words[word].items() finally: self._lock.release() def words_get_all(self): self._lock.acquire() try: return self._words.keys() finally: self._lock.release() def file_get(self, index): self._lock.acquire() try: return self._files[str(index)] finally: self._lock.release() def count_of_words(self): self._lock.acquire() try: return len(self._words) finally: self._lock.release() def count_of_files(self): self._lock.acquire() try: return len(self._files) finally: self._lock.release() def add_new_file(self, path): self._lock.acquire() try: skip = False for p, m in self._files.values(): if p == path: skip = True break if not skip: x = str(len(self._files)) self._files[x] = (path, 0) return not skip finally: self._lock.release() def update_words_in_file(self, words, indexOfFile, newModifiedTimestamp): self._lock.acquire() try: #delete old references for key in self._words.keys(): temp = self._words[key] if indexOfFile in temp: del temp[indexOfFile] self._words[key] = temp #update with new words in file for pos, word in words: try: if not word in self._words: temp = dict() temp[indexOfFile] = set([pos]) self._words[word] = temp elif not indexOfFile in self._words[word]: temp = self._words[word] temp[indexOfFile] = set([pos]) self._words[word] = temp else: temp = self._words[word] temp[indexOfFile].add(pos) self._words[word] = temp except Exception as e: print('ERROR: word=' + word) raise x = str(indexOfFile) path, timestamp = self._files[x] self._files[x] = (path, newModifiedTimestamp) finally: self._lock.release() def clear(self): self._lock.acquire() try: self._files.close() self._words.close() self._files = shelve.open(self._pathRoot + '/shelve_files.' + self.version, flag='n') self._words = shelve.open(self._pathRoot + '/shelve_words.' + self.version, flag='n') finally: self._lock.release() def flush(self): self._logger.write('Saving db...\n') self._lock.acquire() try: self._files.sync() self._words.sync() except Exception as e: self._logger.write(str(e)) finally: self._lock.release() def close(self): self._files.close() self._words.close()
class Database(): version = '0.0.3' def __init__(self, pathRoot, logger=None): self._pathRoot = '' self._lock = None self._files = [] self._words = dict() self._dirty = False if logger == None: self._logger = FakeLogView('Database Log') else: self._logger = logger self._pathRoot = pathRoot self._lock = threading.RLock() self._lock.acquire() #init databases try: with open(self._pathRoot + '/database_words' + self.version + '.cPickle', 'rb') as cache: self._words = cPickle.load(cache) self._logger.write('loaded word cache...' + str(len(self._words)) + ' words.') except Exception as e: self._words = None self._logger.write(str(e)) try: with open(self._pathRoot + '/database_files' + self.version + '.cPickle', 'rb') as cache: self._files = cPickle.load(cache) self._logger.write('loaded file cache...' + str(len(self._files)) + ' files.') except Exception as e: self._files = None self._logger.write(str(e)) if self._words == None or self._files == None: self._words = dict() self._logger.write('created word cache!') self._files = dict() self._logger.write('created file cache!') self._dirty = True self._lock.release() def file_position_get_for_word(self, word): self._lock.acquire() try: return self._words[word].items() finally: self._lock.release() def words_get_all(self): self._lock.acquire() try: return self._words.keys() finally: self._lock.release() def file_get(self, index): self._lock.acquire() try: return self._files[str(index)] finally: self._lock.release() def count_of_words(self): self._lock.acquire() try: return len(self._words) finally: self._lock.release() def count_of_files(self): self._lock.acquire() try: return len(self._files) finally: self._lock.release() def add_new_file(self, path): self._lock.acquire() try: skip = False for key, value in self._files.items(): if value[0] == path: skip = True break if not skip: x = str(len(self._files)) self._files[x] = (path, 0) self._dirty = True return not skip finally: self._lock.release() def update_words_in_file(self, words, indexOfFile, newModifiedTimestamp): self._lock.acquire() try: #delete old references for key in self._words: self._words[key].pop(indexOfFile, None) #update with new words in file for pos, word in words: if not word in self._words: self._words[word] = { indexOfFile : set([pos]) } elif not indexOfFile in self._words[word]: self._words[word][indexOfFile] = set([pos]) else: self._words[word][indexOfFile].add(pos) x = str(indexOfFile) path, timestamp = self._files[x] self._files[x] = (path, newModifiedTimestamp) finally: self._lock.release() self._dirty = True def clear(self): self._lock.acquire() try: self._words = dict() self._files = dict() finally: self._lock.release() def flush(self): if not self._dirty: self._logger.write('Db not dirty...') return else: self._logger.write('Saving db...') self._lock.acquire() try: with open(self._pathRoot + '/database_files' + self.version + '.cPickle', 'wb') as cache: cPickle.dump(self._files, cache) self._logger.write(str(len(self._files)) + ' files pickled...') with open(self._pathRoot + '/database_words' + self.version + '.cPickle', 'wb') as cache: cPickle.dump(self._words, cache) self._logger.write(str(len(self._words)) + ' words pickled...') self._dirty = False except Exception as e: self._logger.write(str(e)) finally: self._lock.release() def close(self): self.flush()
class DatabaseBySQLite(): version = '0.0.4' def __init__(self, pathRoot, logger=None): self._connection = DatabaseConnection(pathRoot, self.version) if logger == None: self._logger = FakeLogView('Database Log') else: self._logger = logger self._connection.acquire() try: self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "files" ("id" INTEGER PRIMARY KEY NOT NULL , "path" TEXT NOT NULL, "timestamp" INTEGER NOT NULL)''') self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "words" ("id" INTEGER PRIMARY KEY NOT NULL , "word" TEXT)''') self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "positions" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "id_file" INTEGER NOT NULL, "id_word" INTEGER NOT NULL, "position" INTEGER NOT NULL, FOREIGN KEY(id_file) REFERENCES files(id), FOREIGN KEY(id_word) REFERENCES words(id))''') self._connection.commit() self._logger.write('loaded sqlite cache...') self._logger.write(str(self.count_of_files(False)) + ' files...') self._logger.write(str(self.count_of_words(False)) + ' words...') finally: self._connection.release() def file_position_get_for_word(self, word): self._connection.acquire() try: results = dict() self._connection.curs().execute('''SELECT path, position FROM positions INNER JOIN words ON positions.id_word = words.id INNER JOIN files ON positions.id_file = files.id WHERE words.word = ? ORDER BY path ASC''', (word,)) for path, position in self._connection.curs().fetchall(): if path in results: results[path].update([position]) else: results[path] = set([position]) return results.items() finally: self._connection.release() def words_get_all(self): self._connection.acquire() try: self._connection.curs().execute('''SELECT word FROM words ORDER BY id ASC''') return map(lambda result: result[0], self._connection.curs().fetchall()) finally: self._connection.release() def files_get_all(self): self._connection.acquire() try: self._connection.curs().execute('''SELECT path, timestamp FROM files ORDER BY id ASC''') return self._connection.curs().fetchall() finally: self._connection.release() def count_of_words(self, cached=True): if cached: return self._cached_count_of_words else: self._connection.acquire() try: self._connection.curs().execute("SELECT COUNT(id) FROM words") self._cached_count_of_words = self._connection.curs().fetchone()[0] return self._cached_count_of_words finally: self._connection.release() def count_of_files(self, cached=True): if cached: return self._cached_count_of_files else: self._connection.acquire() try: self._connection.curs().execute("SELECT COUNT(id) FROM files") self._cached_count_of_files = self._connection.curs().fetchone()[0] return self._cached_count_of_files finally: self._connection.release() def add_new_file(self, path): self._connection.acquire() try: self._connection.curs().execute('''SELECT COUNT(id) FROM files WHERE path = ?''', (path,)) count = self._connection.curs().fetchone()[0] if count == 1: return False elif count == 0: count = self.count_of_files() self._connection.curs().execute('''INSERT INTO files VALUES(?, ?, 0)''', (count, path)) self._connection.commit() self._cached_count_of_files += 1 return True else: raise Exception('Found multiple same files!!!') finally: self._connection.release() def update_words_in_file(self, words, path, newModifiedTimestamp): self._connection.acquire() try: #find file id, or add it, if it doesn't exit self._connection.curs().execute('''SELECT id FROM files WHERE path = ?''', (path, )) id_file = self._connection.curs().fetchone() if id_file == None: self.add_new_file(path) self._connection.curs().execute('''SELECT MAX(id) FROM files''') id_file = self._connection.curs().fetchone()[0] else: id_file = id_file[0] #delete old references self._connection.curs().execute('''DELETE FROM positions WHERE id_file = ?''', (id_file,)) params = [] #update with new words in file for pos, word in words: self._connection.curs().execute('''SELECT id FROM words WHERE word = ?''', (word, )) id_word = self._connection.curs().fetchone() if id_word == None: id_word = self.count_of_words() self._connection.curs().execute('''INSERT INTO words VALUES(?, ?)''', (id_word, word)) self._cached_count_of_words += 1 else: id_word = id_word[0] params.append((id_file, id_word, pos)) self._connection.curs().executemany('''INSERT INTO positions(id_file, id_word, position) VALUES(?, ?, ?)''', params) self._connection.curs().execute('''UPDATE files SET timestamp = ? WHERE id = ?''', (newModifiedTimestamp, id_file)) self._connection.commit() finally: self._connection.release() def clear(self): self._connection.acquire() try: self._connection.curs().executescript( '''DELETE FROM positions; DELETE FROM files; DELETE FROM words;''') self._connection.commit() assert self.count_of_files(False) == 0, 'Files could not be deleted!' assert self.count_of_words(False) == 0, 'Words could not be deleted!' finally: self._connection.release() def flush(self): pass def close(self): self._connection.close()