def remove(self, txn, entity_id): entity_chunk_sub_key = self._graph._get( txn, build_key(self._prefix, entity_id)) if entity_chunk_sub_key != UNDEFINED: # remove entity_id from chunk chunk_key = build_key(self._prefix, entity_chunk_sub_key) self._graph._remove_from_lst(txn, chunk_key, entity_id) # Finally remove entity_chunk_sub_key self._graph._remove(txn, entity_chunk_sub_key)
def bulk_add(self, txn, id_iterator): def save_lst(txn, chunk_idx, value, lst): lst_key = build_key(self._prefix, chunk_idx, value) self._graph._set_lst(txn, lst_key, lst) kind = self._kind values_to_id = defaultdict(list) # Getting data for all entity_id in chunk of <CHUNK_SIZE> filter_length = self._filter_length filter_funcs = self._filter_funcs for entity_ids in zip_longest(*([id_iterator] * CHUNK_SIZE), fillvalue=FORBIDDEN_KEY): entity_datas = self._graph._bulk_get(txn, [ build_key(kind, entity_id, DATA_SUFFIX) for entity_id in entity_ids if entity_id != FORBIDDEN_KEY ]) for entity_key, data in list(entity_datas.items()): if filter_length and not entity_match(filter_funcs, data): continue entity_id = int( entity_key.replace(kind + '/', '').replace('/' + DATA_SUFFIX, '')) # not very clean :( lst = [data.get(field, None) for field in self._fields] # Completing values_to_id (it's here that the reversal value -> entity_id become possible) values_to_id[normalize_value(lst)].append(entity_id) # This dic will contain the mapping entity id index key -> value chunk id entity_dict = {} for value, entity_ids in values_to_id.items(): lst = [] value_count_key = build_key(self._prefix, COUNT_SUFFIX, value) value_count = self._graph._get(txn, value_count_key) if value_count == UNDEFINED: value_count = 0 chunk_idx = value_count // CHUNK_SIZE for entity_id in entity_ids: value_count += 1 entity_dict[build_key(self._prefix, entity_id)] = build_key( chunk_idx, value ) # Need to keep an info of relation between entity and the chunk id that contains it lst.append(entity_id) if value_count % CHUNK_SIZE == 0: save_lst(txn, chunk_idx, value, lst) lst = [] chunk_idx += 1 if lst: save_lst(txn, chunk_idx, value, lst) self._graph._set(txn, value_count_key, value_count) self._graph._bulk_set(txn, entity_dict)
def ids(self, txn, filters): resulting_filters = self._compatible_filters(filters) if resulting_filters is False: raise GrapheekIncompetentIndexException( "This index shouldn't have been used") for values in product(*resulting_filters): value = normalize_value(values) value_count_key = build_key(self._prefix, COUNT_SUFFIX, value) value_count_current = self._graph._get(txn, value_count_key) if value_count_current != UNDEFINED: for chunk_idx in range( 0, 1 + int(value_count_current) // CHUNK_SIZE): entity_ids_key = build_key(self._prefix, chunk_idx, value) entity_ids = self._graph._get_lst(txn, entity_ids_key) if entity_ids != UNDEFINED: for entity_id in entity_ids: yield entity_id
def estimate(self, txn, filters): resulting_filters = self._compatible_filters(filters) if resulting_filters is False: return -1 # -1 says that this index shouldn't be used total = 0 for values in product(*resulting_filters): value = normalize_value(values) value_count_key = build_key(self._prefix, COUNT_SUFFIX, value) value_count_current = self._graph._get(txn, value_count_key) if value_count_current != UNDEFINED: total += int(value_count_current) return total # note : this is an over estimation
def get_kind_ids(self, txn, kind): ENTITY_COUNTER = METADATA_VERTEX_COUNTER if kind == KIND_VERTEX else METADATA_EDGE_COUNTER METADATA_ID_LIST_PREFIX = METADATA_VERTEX_ID_LIST_PREFIX if kind == KIND_VERTEX else METADATA_EDGE_ID_LIST_PREFIX limit = int(self._graph._get(None, ENTITY_COUNTER)) // CHUNK_SIZE keys = [ build_key(METADATA_ID_LIST_PREFIX, i) for i in range(0, limit + 1) ] list_entity_ids = self._graph._bulk_get_lst(txn, keys) for entity_ids in list_entity_ids: if entity_ids != UNDEFINED: for entity_id in entity_ids: yield entity_id
def add(self, txn, entity_id, data): if self._filter_length and not entity_match(self._filter_funcs, data): return value = normalize_value( [data.get(field, None) for field in self._fields]) # Get current chunk idx for this value (NOTE : value is a string representing a list) value_count_key = build_key(self._prefix, COUNT_SUFFIX, value) value_count = 0 value_count_current = self._graph._get(txn, value_count_key) if value_count_current != UNDEFINED: value_count = int(value_count_current) # Incrementing entity count for value self._graph._set(txn, value_count_key, value_count + 1) # 1st adding entity_id to good chunk : chunk_idx = value_count // CHUNK_SIZE chunk_key = build_key(self._prefix, chunk_idx, value) self._graph._append_to_lst(txn, chunk_key, entity_id) # Say that entity_id is in a chunk # (we need to keep chunk idx, so that entity id can be removed fastly # from chunk when entity will be removed from DB) self._graph._set(txn, build_key(self._prefix, entity_id), build_key(chunk_idx, value))
def test_update_edge_data(self): # Manually modifying backend from grapheekdb.backends.data.keys import KIND_EDGE, DATA_SUFFIX from grapheekdb.backends.data.keys import build_key txn = self.graph._transaction_begin() try: self.graph._remove( txn, build_key(KIND_EDGE, self.e1.get_id(), DATA_SUFFIX)) self.graph._transaction_commit(txn) except: self.graph._transaction_rollback(txn) exception_raised = False try: self.e1.foobar = 3 except GrapheekDataException: exception_raised = True assert (exception_raised)
def test_remove_node(self): # Manually modifying backend from grapheekdb.backends.data.keys import KIND_VERTEX, OUT_EDGES_SUFFIX from grapheekdb.backends.data.keys import build_key txn = self.graph._transaction_begin() try: # Manually modifying backend self.graph._remove(txn, build_key(KIND_VERTEX, self.n1.get_id(), OUT_EDGES_SUFFIX)) self.graph._transaction_commit(txn) except: self.graph._transaction_rollback(txn) exception_raised = False try: self.n1.remove() except GrapheekDataException: exception_raised = True assert(exception_raised)
def save_lst(txn, chunk_idx, value, lst): lst_key = build_key(self._prefix, chunk_idx, value) self._graph._set_lst(txn, lst_key, lst)