def _reindex_records(self): """Re-index all records in the collection. - (leave records unchanged) - wipe existing indexes, create new - uses search_mapping associated with doc type """ search_indices = [] for doc_type in self.doc_types: index_name = return_index_name(self._collection_id, doc_type.__name__) search_indices.append(index_name) # overwrite temp indices of same name if they exist if self.client.index_exists(index_name): self.client.drop_index(index_name) self.client.new_index(index_name=index_name, mapping=doc_type.index_mapping) records_dict = self.records_db.df.index for document_id in records_dict: record = self.return_record(document_id) doc_type = record["document_type"] link_content = self.gen_link_content(record["document_id"]) (document_id, record_index) = doc_type.gen_search_index( record, link_content=link_content) index_name = return_index_name(self._collection_id, doc_type_str=doc_type.__name__) self.client.index_record( document_id=document_id, record_index=record_index, index_name=index_name, )
def register(self, records_db_path=None, config_path=None, write=True): """Register collection/define asset paths. Registration (and subsequent collection.write) are used to create a persistent collection from an "in-memory" collection. Paths and search index references are stored in a record keyed by collection name. """ if self._register.exists(self.collection_id): raise KeyError( f"collection name {self.collection_id} already taken.") if records_db_path is None: records_db_path = os.path.join(COLLECTIONS_FOLDER, self.collection_id, "records.pkl") if config_path is None: config_path = os.path.join(COLLECTIONS_FOLDER, self.collection_id, "config.json") # rename temp indexes to collection search_indices = [] for doc_type in self.doc_types: tmp_index_name = return_index_name(self._collection_id, doc_type.__name__) index_name = return_index_name(self.collection_id, doc_type.__name__) self.client.rename_index(index_name=tmp_index_name, new_index_name=index_name) search_indices.append(index_name) # register record, add attributes to collection collection_record = { "collection_id": self.collection_id, "records_db_path": records_db_path, "config_path": config_path, "search_indices": search_indices, } self._register.add(collection_record) for k, v in collection_record.items(): setattr(self, k, v) self.registered = True if write: self.write()
def remove_record(self, document_id): """Remove (resolved) document_id record from collection.""" # get doc_type to remove from search index record = self.return_record(document_id) self.records_db.rm_record(document_id) if self.registered: self.records_db.write(self.records_db_path) doc_type = record["document_type"] index_name = return_index_name(self._collection_id, doc_type_str=doc_type.__name__) self.client.delete_record(document_id, index_name=index_name)
def add_record(self, record, index_linked_content, write=True): """Add record to collection, write to disk if registered.""" self.records_db.update_record(record) if self.registered: self.records_db.write(self.records_db_path) doc_type = record["document_type"] if index_linked_content: link_content = self.gen_link_content(record["document_id"]) else: link_content = None (document_id, record_index) = doc_type.gen_search_index(record, link_content=link_content) index_name = return_index_name(self._collection_id, doc_type_str=doc_type.__name__) self.client.index_record(document_id=document_id, record_index=record_index, index_name=index_name)
def new(cls, collection_id, doc_types): """Create a new collection. Until registered, collection assets are in-memory/overwritable. """ if cls._register.exists(collection_id): raise KeyError(f"collection name {collection_id} already taken.") # create records db collection_schema = {} for doc_type in doc_types: collection_schema = update_dict(collection_schema, doc_type.schema) records_db = Data.new(collection_schema) # create/overwrite temporary search indexes _collection_id = f"tmp_{collection_id}" search_indices = [] for doc_type in doc_types: index_name = return_index_name(_collection_id, doc_type.__name__) search_indices.append(index_name) # overwrite temp indices of same name if they exist if cls.client.index_exists(index_name): cls.client.drop_index(index_name) cls.client.new_index(index_name=index_name, mapping=doc_type.index_mapping) configd = { "doc_types": [doc_type.__name__ for doc_type in doc_types], } coll = cls( collection_id=collection_id, doc_types=doc_types, records_db=records_db, configd=configd, ) coll.search_indices = search_indices return coll