def test_update_value(self): self.data_service.update(CollectionMetaData('col'), [2], [{ 'id': 2, 'first_name': 'Joooooohn', 'last_name': 'Smith' }]) self.assertEqual(self.data_service.file_len('data-test/col/data1.bin'), 3) filter_tool = FilterTool({'$filter': {'first_name': 'Joooooohn'}}) result = self.data_service.find_one_in_file('data-test/col/data1.bin', filter_tool) self.assertEqual(result['id'], 2) self.assertEqual(result['first_name'], 'Joooooohn') self.data_service.update(CollectionMetaData('col'), [2], [{ 'id': 2, 'first_name': 'John', 'last_name': 'Smith' }]) results = self.data_service.find_one_in_file('data-test/col/data1.bin', filter_tool) self.assertIsNone(results)
def test_append_doc_to_new_file(self): DatabaseContext.MAX_DOC_PER_FILE = 3 col_meta_data = CollectionMetaData('col') self.data_service.append(col_meta_data, [{'id': 123}]) self.assertEqual(self.data_service.file_len('data-test/col/data3.bin'), 1) col_meta_data.remove_last_data_file()
def create(self, collection): if os.path.exists(DatabaseContext.DATA_FOLDER + collection): return {'status': 'already existing'} os.makedirs(DatabaseContext.DATA_FOLDER + collection) # to initialize the meta data of the collection CollectionMetaData(collection) # all the collections must have the id index return self.indexes_service.build_index(CollectionMetaData(collection), [], 'id')
def test_add_remove_index(self): meta_data = CollectionMetaData('col') result = meta_data.add_or_update_index_count('id', 10) self.assertEqual(result['status'], 'done') self.assertEqual(meta_data.indexes['id'], 10) meta_data.remove_index_count('id') self.assertEqual(result['status'], 'done') self.assertTrue('id' not in meta_data.indexes)
def test_bulk_insert_and_update(self): docs = [{ 'id': 21 }, { 'id': 2, 'first_name': 'Jack', 'last_name': 'Smith' }, { 'id': 22 }, { 'id': 5, 'first_name': 'Emmmmmmmmmmmmmet', 'last_name': 'Brown' }, { 'id': 23 }] col_meta_data = CollectionMetaData('col') count = self.collections_service.count(col_meta_data) self.crud_service.upsert(col_meta_data, docs) self.assertEqual(self.collections_service.count(col_meta_data), count + 3) self.assertEqual( self.query_manager.get_one(col_meta_data.collection, 2), docs[1]) self.assertEqual( self.query_manager.get_one(col_meta_data.collection, 5), docs[3]) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 21), [count]) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 22), [count + 1]) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 23), [count + 2])
def test_create_index(self): col_meta_data = CollectionMetaData('col') docs = self.data_service.find_all(col_meta_data, None) result = self.indexes_service.build_index(col_meta_data, docs, 'id') self.assertEqual(result['status'], 'done')
def test_update_indexes(self): col_meta_data = CollectionMetaData('col') docs = self.data_service.find_all(col_meta_data, None) self.indexes_service.build_index(col_meta_data, docs, 'id') lines = self.indexes_service.find_all( col_meta_data, 'id', FilterTool({'$filter': { 'id': 2 }})) # this updates the index information, not the document itself self.indexes_service.update_indexes(col_meta_data, [{ 'id': 2 }], [{ 'id': 20 }]) new_lines = self.indexes_service.find_all( col_meta_data, 'id', FilterTool({'$filter': { 'id': 20 }})) self.assertEqual(len(new_lines), 1) self.assertListEqual(lines, new_lines)
def test_clean_deleted_items(self): col_meta_data = CollectionMetaData('col') count = len(self.data_service.find_all(col_meta_data, None)) self.data_service.update(col_meta_data, [2], [{}]) CleaningStack.get_instance().push(col_meta_data, {}, 1) docs = self.data_service.find_all(col_meta_data, None) lines = self.indexes_service.find_all(col_meta_data, 'id', FilterTool({'$filter': {'id': 2}})) self.assertEqual(len(CleaningStack.get_instance().stack), 1) self.assertEqual(count, len(docs)) self.assertEqual(len(self.search_service.find_in_docs(docs, SearchContext({'$filter': {'id': 2}}))), 0) self.assertEqual(len(lines), 1) CleaningThread().run() docs = self.data_service.find_all(col_meta_data, None) lines = self.indexes_service.find_all(col_meta_data, 'id', FilterTool({'$filter': {'id': 2}})) self.assertEqual(len(CleaningStack.get_instance().stack), 0) self.assertEqual(count - 1, len(docs)) self.assertEqual(len(self.search_service.find_in_docs(docs, SearchContext({'$filter': {'id': 2}}))), 0) self.assertEqual(len(lines), 0)
def build_threads_need(self, collection, action): if action == 'search': return list(range(1, CollectionMetaData(collection).counter + 1)) if action == 'upsert': return [1] if action == 'patch': return [1] if action == 'delete': return [1]
def test_remove_doc_in_file(self): result = self.data_service.update(CollectionMetaData('col'), [6], [{}])[0] self.assertEqual(result, {'line': 5, 'doc': {}}) filter_tool = FilterTool({'$filter': {'id': 6}}) result = self.data_service.find_one_in_file('data-test/col/data2.bin', filter_tool) self.assertIsNone(result)
def run(self): try: if self.item == None: return if self.item['action'] == 'search': results = self.search_service.search_by_thread(CollectionMetaData(self.item['collection']), SearchContext(self.item['search_query']), self.thread_id) elif self.item['action'] == 'upsert': results = self.crud_service.upsert(CollectionMetaData(self.item['collection']), self.item['docs']) elif self.item['action'] == 'patch': results = self.crud_service.patch(CollectionMetaData(self.item['collection']), self.item['previous_doc'], self.item['doc']) elif self.item['action'] == 'delete': results = self.crud_service.delete(CollectionMetaData(self.item['collection']), self.item['search_query']) QueryStack.get_instance().push_results(results, self.query_id, self.thread_id) except Exception as e: print(f'Query thread failed with {e}') QueryStack.get_instance().push_error(e, self.query_id)
def get_status(self, collection): col_meta_data = CollectionMetaData(collection) indexes = [] for k in col_meta_data.indexes.keys(): indexes.append({'field': k, 'count': col_meta_data.indexes[k]}) return {'count' : self.count(col_meta_data), 'indexes': indexes }
def init_data_folder(col_name, col_size): DatabaseContext.MAX_DOC_PER_FILE = col_size DatabaseContext.DATA_FOLDER = 'data-test/' if os.path.exists(DatabaseContext.DATA_FOLDER) == False: os.makedirs(DatabaseContext.DATA_FOLDER) if os.path.exists(DatabaseContext.DATA_FOLDER + col_name) == False: os.makedirs(DatabaseContext.DATA_FOLDER + col_name) return CollectionMetaData(col_name)
def test_append_bulk(self): DatabaseContext.MAX_DOC_PER_FILE = 3 col_meta_data = CollectionMetaData('col') self.data_service.append(col_meta_data, [{ 'id': 201 }, { 'id': 202 }, { 'id': 203 }, { 'id': 204 }, { 'id': 205 }]) self.assertEqual(self.data_service.file_len('data-test/col/data3.bin'), 3) self.assertEqual(self.data_service.file_len('data-test/col/data4.bin'), 2) col_meta_data.remove_last_data_file() col_meta_data.remove_last_data_file()
def test_find_doc_multiple_sort(self): search_context = SearchContext({'$sort': {'first_name': 'ASC', 'last_name': 'DESC'}}) results = self.search_service.search_by_thread(CollectionMetaData('col'), search_context, None) self.assertEqual(len(results), 6) self.assertEqual(results[0]['first_name'], 'Biff') self.assertEqual(results[1]['first_name'], 'Emmett') self.assertEqual(results[2]['first_name'], 'John') self.assertEqual(results[2]['last_name'], 'Smith') self.assertEqual(results[3]['first_name'], 'John') self.assertEqual(results[3]['last_name'], 'Doe') self.assertEqual(results[4]['first_name'], 'Marty') self.assertEqual(results[5]['first_name'], 'Sergio')
def test_bulk_delete(self): search_query = {'$filter': {'id': [3, 4]}} col_meta_data = CollectionMetaData('col') count = self.collections_service.count(col_meta_data) self.crud_service.delete(col_meta_data, search_query) while CleaningStack.get_instance().contains_data(): time.sleep(DatabaseContext.THREADS_CYCLE) self.assertEqual(self.collections_service.count(col_meta_data), count - 2) results = self.query_manager.search(col_meta_data.collection, search_query) self.assertEqual(len(results), 0)
def get_status(self): cols = [] for f in os.listdir(DatabaseContext.DATA_FOLDER): col_meta_data = CollectionMetaData(f) cols.append({ 'collection': f, 'count': self.collections_service.count(col_meta_data), 'size (bytes)': self.collection_size(col_meta_data) }) return { 'collections': cols, 'cleaning_operations': CleaningStack.get_instance().get_details(), 'query_operations': QueryStack.get_instance().get_details(), 'replication_operations': ReplicationStack.get_instance().get_details() }
def test_bulk_insert_new_docs(self): docs = [{'id': 11}, {'id': 12}, {'id': 13}, {'id': 14}, {'id': 15}] col_meta_data = CollectionMetaData('col') count = self.collections_service.count(col_meta_data) self.crud_service.upsert(col_meta_data, docs) self.assertEqual(self.collections_service.count(col_meta_data), count + 5) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 11), [count]) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 12), [count + 1]) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 13), [count + 2]) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 14), [count + 3]) self.assertEqual(self.indexes_service.get_lines(col_meta_data, 15), [count + 4])
def test_bulk_update_docs(self): docs = [{ 'id': 2, 'first_name': 'Joe', 'last_name': 'Smith' }, { 'id': 5, 'first_name': 'Emmetttttttt', 'last_name': 'Brown' }] col_meta_data = CollectionMetaData('col') count = self.collections_service.count(col_meta_data) self.crud_service.upsert(col_meta_data, docs) self.assertEqual(self.collections_service.count(col_meta_data), count) self.assertEqual( self.query_manager.get_one(col_meta_data.collection, 2), docs[0]) self.assertEqual( self.query_manager.get_one(col_meta_data.collection, 5), docs[1])
def test_search_over_500000_docs_with_index_separated_threads(self): results = self.search_service.search_by_thread(CollectionMetaData('big-col-with-index'), SearchContext({'$filter': {'id': 449994}}), 5) self.assertEqual(len(results), 1) self.assertEqual(results[0]['id'], 449994)
def test_search_over_500000_docs(self): results = self.search_service.search_by_thread(CollectionMetaData('big-col'), SearchContext({'$filter': {'id': 449994}}), None) self.assertEqual(len(results), 1) self.assertEqual(results[0]['id'], 449994)
def test_find_doc_with_skip_size_sort(self): search_context = SearchContext({'$filter': {'first_name': {'$exists': True}}, '$skip': 1, '$size': 2, '$sort': {'id': 'DESC'}}) results = self.search_service.search_by_thread(CollectionMetaData('col'), search_context, None) self.assertEqual(len(results), 2) self.assertEqual(results[0]['id'], 5) self.assertEqual(results[1]['id'], 4)
def test_find_doc_in_second_file(self): search_context = SearchContext({'$filter': {'id': 6}}) results = self.search_service.search_by_thread(CollectionMetaData('col'), search_context, None) self.assertEqual(len(results), 1)
def test_find_by_line_second_file_actual_thread(self): docs = self.data_service.find_by_line(CollectionMetaData('col'), [4], 2) self.assertEqual(len(docs), 1) self.assertEqual(docs[0]['id'], 5)
def test_find_by_multiple_lines_but_found_second_one(self): docs = self.data_service.find_by_line(CollectionMetaData('col'), [1, 4], 2) self.assertEqual(len(docs), 1) self.assertEqual(docs[0]['id'], 5)
def test_append_doc_in_file(self): DatabaseContext.MAX_DOC_PER_FILE = 10000 self.data_service.append(CollectionMetaData('col'), [{'id': 123}]) self.assertEqual(self.data_service.file_len('data-test/col/data2.bin'), 4)
def create_index(collection, field): col_meta_data = CollectionMetaData(collection) docs = data_service.find_all(col_meta_data, None) return indexes_service.build_index(col_meta_data, docs, field)
def test_find_by_line_second_file_previous_thread(self): docs = self.data_service.find_by_line(CollectionMetaData('col'), [4], 1) self.assertEqual(len(docs), 0)
def test_remove_index(self): col_meta_data = CollectionMetaData('col') result = self.indexes_service.remove_index(col_meta_data, 'id') self.assertEqual(result['status'], 'done')
def delete_index(collection, field): return indexes_service.remove_index(CollectionMetaData(collection), field)