def __init__(self, pathRoot, logger=None):

		self._connection = DatabaseConnection(pathRoot, self.version)

		if logger == None:
			self._logger = FakeLogView('Database Log')
		else:
			self._logger = logger

		self._connection.acquire()
		try:

			self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "files"
				("id" INTEGER PRIMARY KEY NOT NULL , "path" TEXT NOT NULL, "timestamp" INTEGER NOT NULL)''')
			self._connection.curs().execute('''CREATE  TABLE  IF NOT EXISTS "words"
				("id" INTEGER PRIMARY KEY NOT NULL , "word" TEXT)''')
			self._connection.curs().execute('''CREATE  TABLE  IF NOT EXISTS "positions"
				("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
				 	"id_file" INTEGER NOT NULL,
					"id_word" INTEGER NOT NULL,
					"position" INTEGER NOT NULL,
					FOREIGN KEY(id_file) REFERENCES files(id),
					FOREIGN KEY(id_word) REFERENCES words(id))''')
			self._connection.commit()

			self._logger.write('loaded sqlite cache...')
			self._logger.write(str(self.count_of_files(False)) + ' files...')
			self._logger.write(str(self.count_of_words(False)) + ' words...')

		finally:
			self._connection.release()
예제 #2
0
	def __init__(self, pathRoot, logger=None):

		self._pathRoot = ''
		self._lock = None
		self._files = []
		self._words = dict()
		self._dirty = False

		if logger == None:
			self._logger = FakeLogView('Database Log')
		else:
			self._logger = logger

		self._pathRoot = pathRoot
		self._lock = threading.RLock()
		self._lock.acquire()

		#init databases
		try:
			with open(self._pathRoot + '/database_words' + self.version + '.cPickle', 'rb') as cache:
				self._words = cPickle.load(cache)
				self._logger.write('loaded word cache...' + str(len(self._words)) + ' words.')
		except Exception as e:
			self._words = None
			self._logger.write(str(e))
		try:
			with open(self._pathRoot + '/database_files' + self.version + '.cPickle', 'rb') as cache:
				self._files = cPickle.load(cache)
				self._logger.write('loaded file cache...' + str(len(self._files)) + ' files.')
		except Exception as e:
			self._files = None
			self._logger.write(str(e))

		if self._words == None or self._files == None:
			self._words = dict()
			self._logger.write('created word cache!')
			self._files = dict()
			self._logger.write('created file cache!')
			self._dirty = True

		self._lock.release()
	def __init__(self, pathRoot, logger=None):

		self._lock = None
		self._words = None
		self._files = None
		self._pathRoot = pathRoot

		if logger == None:
			self._logger = FakeLogView('Database Log')
		else:
			self._logger = logger

		self._lock = threading.RLock()
		self._lock.acquire()
		try:
			self._files = shelve.open(self._pathRoot + '/shelve_files.' + self.version, flag='c')
			self._words = shelve.open(self._pathRoot + '/shelve_words.' + self.version, flag='c')
			self._logger.write('Shelve DB loaded file cache...' + str(len(self._files)) + ' files.')
			self._logger.write('Shelve DB loaded word cache...' + str(len(self._words)) + ' words.')
		finally:
			self._lock.release()
class DatabaseByShelfing():
	version = '0.0.3'

	def __init__(self, pathRoot, logger=None):

		self._lock = None
		self._words = None
		self._files = None
		self._pathRoot = pathRoot

		if logger == None:
			self._logger = FakeLogView('Database Log')
		else:
			self._logger = logger

		self._lock = threading.RLock()
		self._lock.acquire()
		try:
			self._files = shelve.open(self._pathRoot + '/shelve_files.' + self.version, flag='c')
			self._words = shelve.open(self._pathRoot + '/shelve_words.' + self.version, flag='c')
			self._logger.write('Shelve DB loaded file cache...' + str(len(self._files)) + ' files.')
			self._logger.write('Shelve DB loaded word cache...' + str(len(self._words)) + ' words.')
		finally:
			self._lock.release()

	def file_position_get_for_word(self, word):
		self._lock.acquire()
		try:
			return self._words[word].items()
		finally:
			self._lock.release()

	def words_get_all(self):
		self._lock.acquire()
		try:
			return self._words.keys()
		finally:
			self._lock.release()

	def file_get(self, index):
		self._lock.acquire()
		try:
			return self._files[str(index)]
		finally:
			self._lock.release()

	def count_of_words(self):
		self._lock.acquire()
		try:
			return len(self._words)
		finally:
			self._lock.release()

	def count_of_files(self):
		self._lock.acquire()
		try:
			return len(self._files)
		finally:
			self._lock.release()

	def add_new_file(self, path):
		self._lock.acquire()
		try:

			skip = False
			for p, m in self._files.values():
				if p == path:
					skip = True
					break
			if not skip:
				x = str(len(self._files))
				self._files[x] = (path, 0)
			return not skip

		finally:
			self._lock.release()

	def update_words_in_file(self, words, indexOfFile, newModifiedTimestamp):

		self._lock.acquire()
		try:

			#delete old references
			for key in self._words.keys():
				temp = self._words[key]
				if indexOfFile in temp:
					del temp[indexOfFile]
					self._words[key] = temp

			#update with new words in file
			for pos, word in words:
				try:
					if not word in self._words:
						temp = dict()
						temp[indexOfFile] = set([pos])
						self._words[word] = temp
					elif not indexOfFile in self._words[word]:
						temp = self._words[word]
						temp[indexOfFile] = set([pos])
						self._words[word] = temp
					else:
						temp = self._words[word]
						temp[indexOfFile].add(pos)
						self._words[word] = temp
				except Exception as e:
					print('ERROR: word=' + word)
					raise

			x = str(indexOfFile)
			path, timestamp = self._files[x]
			self._files[x] = (path, newModifiedTimestamp)

		finally:
			self._lock.release()



	def clear(self):
		self._lock.acquire()
		try:

			self._files.close()
			self._words.close()
			self._files = shelve.open(self._pathRoot + '/shelve_files.' + self.version, flag='n')
			self._words = shelve.open(self._pathRoot + '/shelve_words.' + self.version, flag='n')

		finally:
			self._lock.release()

	def flush(self):

		self._logger.write('Saving db...\n')

		self._lock.acquire()
		try:
			self._files.sync()
			self._words.sync()
		except Exception as e:
			self._logger.write(str(e))
		finally:
		    self._lock.release()

	def close(self):
		self._files.close()
		self._words.close()
예제 #5
0
class Database():
	version = '0.0.3'

	def __init__(self, pathRoot, logger=None):

		self._pathRoot = ''
		self._lock = None
		self._files = []
		self._words = dict()
		self._dirty = False

		if logger == None:
			self._logger = FakeLogView('Database Log')
		else:
			self._logger = logger

		self._pathRoot = pathRoot
		self._lock = threading.RLock()
		self._lock.acquire()

		#init databases
		try:
			with open(self._pathRoot + '/database_words' + self.version + '.cPickle', 'rb') as cache:
				self._words = cPickle.load(cache)
				self._logger.write('loaded word cache...' + str(len(self._words)) + ' words.')
		except Exception as e:
			self._words = None
			self._logger.write(str(e))
		try:
			with open(self._pathRoot + '/database_files' + self.version + '.cPickle', 'rb') as cache:
				self._files = cPickle.load(cache)
				self._logger.write('loaded file cache...' + str(len(self._files)) + ' files.')
		except Exception as e:
			self._files = None
			self._logger.write(str(e))

		if self._words == None or self._files == None:
			self._words = dict()
			self._logger.write('created word cache!')
			self._files = dict()
			self._logger.write('created file cache!')
			self._dirty = True

		self._lock.release()

	def file_position_get_for_word(self, word):
		self._lock.acquire()
		try:
			return self._words[word].items()
		finally:
			self._lock.release()

	def words_get_all(self):
		self._lock.acquire()
		try:
			return self._words.keys()
		finally:
			self._lock.release()

	def file_get(self, index):
		self._lock.acquire()
		try:
			return self._files[str(index)]
		finally:
			self._lock.release()

	def count_of_words(self):
		self._lock.acquire()
		try:
			return len(self._words)
		finally:
			self._lock.release()

	def count_of_files(self):
		self._lock.acquire()
		try:
			return len(self._files)
		finally:
			self._lock.release()

	def add_new_file(self, path):
		self._lock.acquire()
		try:

			skip = False
			for key, value in self._files.items():
				if value[0] == path:
					skip = True
					break
			if not skip:
				x = str(len(self._files))
				self._files[x] = (path, 0)
				self._dirty = True

			return not skip

		finally:
			self._lock.release()


	def update_words_in_file(self, words, indexOfFile, newModifiedTimestamp):

		self._lock.acquire()
		try:

			#delete old references
			for key in self._words:
				self._words[key].pop(indexOfFile, None)

			#update with new words in file
			for pos, word in words:
				if not word in self._words:
					self._words[word] = { indexOfFile : set([pos]) }
				elif not indexOfFile in self._words[word]:
					self._words[word][indexOfFile] = set([pos])
				else:
					self._words[word][indexOfFile].add(pos)

			x = str(indexOfFile)
			path, timestamp = self._files[x]
			self._files[x] = (path, newModifiedTimestamp)

		finally:
			self._lock.release()

		self._dirty = True

	def clear(self):
		self._lock.acquire()
		try:

			self._words = dict()
			self._files = dict()

		finally:
			self._lock.release()

	def flush(self):
		if not self._dirty:
			self._logger.write('Db not dirty...')
			return
		else:
			self._logger.write('Saving db...')

		self._lock.acquire()
		try:
			with open(self._pathRoot + '/database_files' + self.version + '.cPickle', 'wb') as cache:
				cPickle.dump(self._files, cache)
				self._logger.write(str(len(self._files)) + ' files pickled...')
			with open(self._pathRoot + '/database_words' + self.version + '.cPickle', 'wb') as cache:
				cPickle.dump(self._words, cache)
				self._logger.write(str(len(self._words)) + ' words pickled...')
			self._dirty = False
		except Exception as e:
			self._logger.write(str(e))
		finally:
		    self._lock.release()

	def close(self):
	   	self.flush()
class DatabaseBySQLite():
	version = '0.0.4'

	def __init__(self, pathRoot, logger=None):

		self._connection = DatabaseConnection(pathRoot, self.version)

		if logger == None:
			self._logger = FakeLogView('Database Log')
		else:
			self._logger = logger

		self._connection.acquire()
		try:

			self._connection.curs().execute('''CREATE TABLE IF NOT EXISTS "files"
				("id" INTEGER PRIMARY KEY NOT NULL , "path" TEXT NOT NULL, "timestamp" INTEGER NOT NULL)''')
			self._connection.curs().execute('''CREATE  TABLE  IF NOT EXISTS "words"
				("id" INTEGER PRIMARY KEY NOT NULL , "word" TEXT)''')
			self._connection.curs().execute('''CREATE  TABLE  IF NOT EXISTS "positions"
				("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
				 	"id_file" INTEGER NOT NULL,
					"id_word" INTEGER NOT NULL,
					"position" INTEGER NOT NULL,
					FOREIGN KEY(id_file) REFERENCES files(id),
					FOREIGN KEY(id_word) REFERENCES words(id))''')
			self._connection.commit()

			self._logger.write('loaded sqlite cache...')
			self._logger.write(str(self.count_of_files(False)) + ' files...')
			self._logger.write(str(self.count_of_words(False)) + ' words...')

		finally:
			self._connection.release()

	def file_position_get_for_word(self, word):
		self._connection.acquire()
		try:

			results = dict()
			self._connection.curs().execute('''SELECT path, position
				FROM positions
				INNER JOIN words ON positions.id_word = words.id
				INNER JOIN files ON positions.id_file = files.id
				WHERE words.word = ?
				ORDER BY path ASC''', (word,))
			for path, position in self._connection.curs().fetchall():
				if path in results:
					results[path].update([position])
				else:
					results[path] = set([position])

			return results.items()

		finally:
			self._connection.release()

	def words_get_all(self):
		self._connection.acquire()
		try:
			self._connection.curs().execute('''SELECT word FROM words ORDER BY id ASC''')
			return map(lambda result: result[0], self._connection.curs().fetchall())
		finally:
			self._connection.release()

	def files_get_all(self):
		self._connection.acquire()
		try:
			self._connection.curs().execute('''SELECT path, timestamp FROM files ORDER BY id ASC''')
			return self._connection.curs().fetchall()
		finally:
			self._connection.release()

	def count_of_words(self, cached=True):
		if cached:
			return self._cached_count_of_words
		else:
			self._connection.acquire()
			try:
				self._connection.curs().execute("SELECT COUNT(id) FROM words")
				self._cached_count_of_words = self._connection.curs().fetchone()[0]
				return self._cached_count_of_words
			finally:
				self._connection.release()

	def count_of_files(self, cached=True):
		if cached:
			return self._cached_count_of_files
		else:
			self._connection.acquire()
			try:
				self._connection.curs().execute("SELECT COUNT(id) FROM files")
				self._cached_count_of_files = self._connection.curs().fetchone()[0]
				return self._cached_count_of_files
			finally:
				self._connection.release()

	def add_new_file(self, path):
		self._connection.acquire()
		try:

			self._connection.curs().execute('''SELECT COUNT(id) FROM files
				WHERE path = ?''', (path,))
			count = self._connection.curs().fetchone()[0]

			if count == 1:
				return False
			elif count == 0:
				count = self.count_of_files()
				self._connection.curs().execute('''INSERT INTO files
					VALUES(?, ?, 0)''', (count, path))
				self._connection.commit()
				self._cached_count_of_files += 1
				return True
			else:
				raise Exception('Found multiple same files!!!')
		finally:
			self._connection.release()


	def update_words_in_file(self, words, path, newModifiedTimestamp):
		self._connection.acquire()
		try:

			#find file id, or add it, if it doesn't exit
			self._connection.curs().execute('''SELECT id FROM files WHERE path = ?''', (path, ))
			id_file = self._connection.curs().fetchone()
			if id_file == None:
				self.add_new_file(path)
				self._connection.curs().execute('''SELECT MAX(id) FROM files''')
				id_file = self._connection.curs().fetchone()[0]
			else:
				id_file = id_file[0]


			#delete old references
			self._connection.curs().execute('''DELETE FROM positions
				WHERE id_file = ?''', (id_file,))

			params = []

			#update with new words in file
			for pos, word in words:
				self._connection.curs().execute('''SELECT id FROM words WHERE word = ?''', (word, ))
				id_word = self._connection.curs().fetchone()
				if id_word == None:
					id_word = self.count_of_words()
					self._connection.curs().execute('''INSERT INTO words VALUES(?, ?)''', (id_word, word))
					self._cached_count_of_words += 1
				else:
					id_word = id_word[0]

				params.append((id_file, id_word, pos))
			self._connection.curs().executemany('''INSERT INTO positions(id_file, id_word, position) VALUES(?, ?, ?)''', params)
			self._connection.curs().execute('''UPDATE files SET timestamp = ? WHERE id = ?''', (newModifiedTimestamp, id_file))

			self._connection.commit()

		finally:
			self._connection.release()

	def clear(self):
		self._connection.acquire()
		try:

			self._connection.curs().executescript(
				'''DELETE FROM positions;
				DELETE FROM files;
				DELETE FROM words;''')
			self._connection.commit()
			assert self.count_of_files(False) == 0, 'Files could not be deleted!'
			assert self.count_of_words(False) == 0, 'Words could not be deleted!'

		finally:
			self._connection.release()

	def flush(self):
		pass

	def close(self):
		self._connection.close()