Exemple #1
0
	def _update_page(self, page, cache):

		'''
		Write in the temp_cache given by _read_page
		'''

		page_content = JSONStorage(find_nth_file(self.name, os.getcwd()+'/'+self._db.db_name, page))
		page_content.write(cache)
Exemple #2
0
	def __init__(self, name, database):


		# The maximum limit of each JSON file
		self.FILE_SIZE_LIMIT = 100

		# The write-in frequency
		self.WRITE_IN_FREQ = 0.1

		# The size for the query cache file
		self.QUERY_CACHE_SIZE_LIMIT = 30

		# The number of JSON files and the path for the current JSON file
		self.file_num, self.current_file = find_current_file(name, os.getcwd()+'/'+database.db_name)

		self.storage = JSONStorage(self.current_file)


		self.name = name
		self._db = database

		# Cache for the whole collection
		self._cache = {}

		# Cache for the latest file that is being used to write in
		self._current_cache = {}

		#Cache for the most frequently queried items
		# self._query_cache = LRUCache(self.QUERY_CACHE_SIZE_LIMIT)


		# Each entry has a unique id as the primary key

		existing_ids = self._read().keys()

		if existing_ids:
			self._last_id = max(i for i in existing_ids)
		else:
			self._last_id = 0

		self._load_query_cache()
		self.update_storage()
Exemple #3
0
	def update_storage(self):
		if len(self._current_cache) >= self.FILE_SIZE_LIMIT:
		# If we enter this function because of the file size limit, we do the following:
			## write to the file
			## create the next file
			## point our current storage block to the next file
			self.storage.write(dict(self._current_cache))
			# print time.ctime(), 'size limit'
			self._current_cache = {}
			self.file_num += 1
			self.storage = JSONStorage(os.getcwd()+'/'+ self._db.db_name + '/' + self.name + '_' + str(self.file_num) + '.json')
		else:
		# Else, it is just because of the timer expiration, we do the regular write-in
			# print time.ctime(), 'regular write-in', len(self._current_cache)

			self.storage.write(dict(self._current_cache))
			self.query_storage.write(self._query_cache.items)
			# print self._query_cache.items

		# Set the timer again. Make it periodic.
		self.t = threading.Timer(self.WRITE_IN_FREQ, self.update_storage)
		self.t.start()
Exemple #4
0
	def _load_query_cache(self):

		self.query_storage = JSONStorage(os.getcwd()+'/'+ self._db.db_name + '/' + self.name + '_query.json')
		self._query_cache = LRUCache(self.QUERY_CACHE_SIZE_LIMIT, self.query_storage.read(is_ordered_dict=True))
Exemple #5
0
class Collection(object):
	def __init__(self, name, database):


		# The maximum limit of each JSON file
		self.FILE_SIZE_LIMIT = 100

		# The write-in frequency
		self.WRITE_IN_FREQ = 0.1

		# The size for the query cache file
		self.QUERY_CACHE_SIZE_LIMIT = 30

		# The number of JSON files and the path for the current JSON file
		self.file_num, self.current_file = find_current_file(name, os.getcwd()+'/'+database.db_name)

		self.storage = JSONStorage(self.current_file)


		self.name = name
		self._db = database

		# Cache for the whole collection
		self._cache = {}

		# Cache for the latest file that is being used to write in
		self._current_cache = {}

		#Cache for the most frequently queried items
		# self._query_cache = LRUCache(self.QUERY_CACHE_SIZE_LIMIT)


		# Each entry has a unique id as the primary key

		existing_ids = self._read().keys()

		if existing_ids:
			self._last_id = max(i for i in existing_ids)
		else:
			self._last_id = 0

		self._load_query_cache()
		self.update_storage()

	def setting(self, file_size=None, query_cache_size=None, write_freq=None):
		if file_size is not None:
			self.FILE_SIZE_LIMIT = file_size

		if query_cache_size is not None:
			self.QUERY_CACHE_SIZE_LIMIT = query_cache_size

		if write_freq is not None:
			self.WRITE_IN_FREQ = write_freq

	def _load_query_cache(self):

		self.query_storage = JSONStorage(os.getcwd()+'/'+ self._db.db_name + '/' + self.name + '_query.json')
		self._query_cache = LRUCache(self.QUERY_CACHE_SIZE_LIMIT, self.query_storage.read(is_ordered_dict=True))

		# print self._query_cache.items.keys()


	def update_storage(self):
		if len(self._current_cache) >= self.FILE_SIZE_LIMIT:
		# If we enter this function because of the file size limit, we do the following:
			## write to the file
			## create the next file
			## point our current storage block to the next file
			self.storage.write(dict(self._current_cache))
			# print time.ctime(), 'size limit'
			self._current_cache = {}
			self.file_num += 1
			self.storage = JSONStorage(os.getcwd()+'/'+ self._db.db_name + '/' + self.name + '_' + str(self.file_num) + '.json')
		else:
		# Else, it is just because of the timer expiration, we do the regular write-in
			# print time.ctime(), 'regular write-in', len(self._current_cache)

			self.storage.write(dict(self._current_cache))
			self.query_storage.write(self._query_cache.items)
			# print self._query_cache.items

		# Set the timer again. Make it periodic.
		self.t = threading.Timer(self.WRITE_IN_FREQ, self.update_storage)
		self.t.start()

	def process_element(self, func, cond=None, eid=None):
		data = self._read()

		if eid is not None:
			for i in eid:
				func(data, eid)

		else:
			for eid in list(data):
				if cond(data[eid]):
					func(data, eid)

		self._write(data)

	def _get_next_id(self):
		current = self._last_id + 1
		self._last_id = current

		return current

	def _read(self):

		if self._cache == {}:
		# When the current collection is first read, read it all at once to the cache.
			self._read_all()
		return self._cache


	def _read_all(self):
		for i in range(1, self.file_num+1):
			raw = JSONStorage(find_nth_file(self.name, os.getcwd()+'/'+self._db.db_name, i)).read()
			for key in list(raw):
				eid = int(key)
				self._cache[eid] = Element(raw[key], eid)
				if i == self.file_num:
					self._current_cache[eid] = Element(raw[key], eid)

	def _write(self, values):

		self.storage.write(values)

	def __len__(self):
		return len(self._read())

	def all(self):
		return list(self._read().values())

	def insert(self, value):

		eid = self._get_next_id()
		self._cache[eid] = value
		self._current_cache[eid] = value


		'''
		If the file in-use reaches the size limit, we do the following:
		1. cancel the timer
		2. enter the update_storage
		Otherwise, we can just wait until the timer expires
		'''
		if len(self._current_cache) >= self.FILE_SIZE_LIMIT:
			self.t.cancel()
			self.update_storage()

	def insert_multiple(self, values):
		for value in values:
			self.insert(value)

	def update(self, fields, cond=None, eids= None):

		'''
		Update can be performed by specifying eid or cond, or both.
		But at least one of them should be specified.
		'''

		if cond is None and eids is None:
			eids = self._cache.keys()

		if cond is not None:
			eids = self._select_to_be_updated_eid(cond, eids)

		pages_eid = self._group_eids(eids)

		self._update_cache(eids, pages_eid, fields)
		self._update_disk(pages_eid, fields)

	def _update_disk(self, pages_eid, fields, remove=False):
		# do the update for the files
		temp_caches = {}

		for page in pages_eid:
			temp_caches[page] = self._read_page(page)
			temp_caches[page] = self._write_in_page(fields, pages_eid[page], temp_caches[page], remove)

		for page in temp_caches:
			self._update_page(page, temp_caches[page])

	def _group_eids(self, eids):
		'''
		group eids based on which page it belongs to
		'''

		pages_eid = {}
		for i in eids:
			page = int(math.ceil(float(i)/float(self.FILE_SIZE_LIMIT)))
			if page not in pages_eid:
				pages_eid[page] = [i]
			else:
				pages_eid[page].append(i)

		return pages_eid

	def _select_to_be_updated_eid(self, cond, eids):

		# Given a condition, return a list of id that satisfies the condition
		filtered_ids = []

		if eids is None:
			# if edis not specified, iterate all possible eids
			for id in self._cache:
				if cond(self._cache[id]):
					filtered_ids.append(id)
		else:
			# check whether the given id satisfied the cond
			for id in eids:
				if cond(self._cache[id]):
					filtered_ids.append(id)
		return filtered_ids

	def _update_cache(self, eids, pages_eid, fields=None, remove=False):
		# update the cache first before write-in to the file

		for i in eids:
			if remove:
				self._cache.pop(i, None)
			else:
				self._cache[i].update(fields)
		for page in pages_eid:
			if page == self.file_num:
				for id in pages_eid[page]:
					if remove:
						self._current_cache.pop(id, None)
					else:
						self._current_cache[id].update(fields)


	def _read_page(self, page):

		'''
		Used for update()
		read every related page
		return a temp_cache
		'''

		page_content = JSONStorage(find_nth_file(self.name, os.getcwd()+'/'+self._db.db_name, page)).read()
		temp_cache = {}
		for key in list(page_content):
			id = int(key)
			temp_cache[id] = Element(page_content[key], id)
		return temp_cache

	def _write_in_page(self, fields, eids, temp_cache, remove=False):
		# write in the updated items
		for eid in eids:
			if remove:
				temp_cache.pop(eid, None)
			else:
				try:
					temp_cache[eid].update(fields)
				except KeyError:
					pass
		return temp_cache

	def _update_page(self, page, cache):

		'''
		Write in the temp_cache given by _read_page
		'''

		page_content = JSONStorage(find_nth_file(self.name, os.getcwd()+'/'+self._db.db_name, page))
		page_content.write(cache)

	def remove(self, cond=None, eids=None):

		if cond is None and eids is None:
			# this means removing all elements
			eids = self._cache.keys()

		if cond is not None:
			eids = self._select_to_be_updated_eid(cond, eids)

		pages_eid = self._group_eids(eids)

		self._update_cache(eids, pages_eid, fields=None, remove=True)

		self._update_disk(pages_eid, fields=None, remove=True)

	def search(self, cond, keys=None):

		elements = self._query_cache.get(str(cond))

		if elements == -1:
			elements = [e for e in self.all() if cond(e)]
			self._query_cache.set(str(cond), elements)
			if keys is not None:
				selected_elements = [{key: e[key] for key in keys} for e in elements]
				return selected_elements
		return elements

	def clear(self):
		self.remove()

	def get(self, cond=None, eid=None):

		if eid is not None:
			read_all = self._read()
			return read_all[eid]

		elements = self.search(cond)

		if elements == []:
			return {}
		return elements[0]

	def close(self):
		# when closing the database, remember to store those that are still in cache and cancel the write-in.
		self.t.cancel()
		self.update_storage()

		self._cache.clear()
		self._current_cache.clear()

		self.t.cancel()