Beispiel #1
0
    def _reindex_records(self):
        """Re-index all records in the collection.

        - (leave records unchanged)
        - wipe existing indexes, create new
        - uses search_mapping associated with doc type
        """
        search_indices = []
        for doc_type in self.doc_types:
            index_name = return_index_name(self._collection_id,
                                           doc_type.__name__)
            search_indices.append(index_name)
            # overwrite temp indices of same name if they exist
            if self.client.index_exists(index_name):
                self.client.drop_index(index_name)
            self.client.new_index(index_name=index_name,
                                  mapping=doc_type.index_mapping)

        records_dict = self.records_db.df.index
        for document_id in records_dict:
            record = self.return_record(document_id)
            doc_type = record["document_type"]
            link_content = self.gen_link_content(record["document_id"])
            (document_id, record_index) = doc_type.gen_search_index(
                record, link_content=link_content)
            index_name = return_index_name(self._collection_id,
                                           doc_type_str=doc_type.__name__)
            self.client.index_record(
                document_id=document_id,
                record_index=record_index,
                index_name=index_name,
            )
Beispiel #2
0
    def register(self, records_db_path=None, config_path=None, write=True):
        """Register collection/define asset paths.

        Registration (and subsequent collection.write) are used to create a persistent collection
        from an "in-memory" collection.  Paths and search index references are stored in a record keyed
        by collection name.
        """
        if self._register.exists(self.collection_id):
            raise KeyError(
                f"collection name {self.collection_id} already taken.")

        if records_db_path is None:
            records_db_path = os.path.join(COLLECTIONS_FOLDER,
                                           self.collection_id, "records.pkl")
        if config_path is None:
            config_path = os.path.join(COLLECTIONS_FOLDER, self.collection_id,
                                       "config.json")

        # rename temp indexes to collection
        search_indices = []
        for doc_type in self.doc_types:
            tmp_index_name = return_index_name(self._collection_id,
                                               doc_type.__name__)
            index_name = return_index_name(self.collection_id,
                                           doc_type.__name__)
            self.client.rename_index(index_name=tmp_index_name,
                                     new_index_name=index_name)
            search_indices.append(index_name)

        # register record, add attributes to collection
        collection_record = {
            "collection_id": self.collection_id,
            "records_db_path": records_db_path,
            "config_path": config_path,
            "search_indices": search_indices,
        }
        self._register.add(collection_record)
        for k, v in collection_record.items():
            setattr(self, k, v)
        self.registered = True
        if write:
            self.write()
Beispiel #3
0
    def remove_record(self, document_id):
        """Remove (resolved) document_id record from collection."""

        # get doc_type to remove from search index
        record = self.return_record(document_id)
        self.records_db.rm_record(document_id)
        if self.registered:
            self.records_db.write(self.records_db_path)
        doc_type = record["document_type"]
        index_name = return_index_name(self._collection_id,
                                       doc_type_str=doc_type.__name__)
        self.client.delete_record(document_id, index_name=index_name)
Beispiel #4
0
 def add_record(self, record, index_linked_content, write=True):
     """Add record to collection, write to disk if registered."""
     self.records_db.update_record(record)
     if self.registered:
         self.records_db.write(self.records_db_path)
     doc_type = record["document_type"]
     if index_linked_content:
         link_content = self.gen_link_content(record["document_id"])
     else:
         link_content = None
     (document_id,
      record_index) = doc_type.gen_search_index(record,
                                                link_content=link_content)
     index_name = return_index_name(self._collection_id,
                                    doc_type_str=doc_type.__name__)
     self.client.index_record(document_id=document_id,
                              record_index=record_index,
                              index_name=index_name)
Beispiel #5
0
    def new(cls, collection_id, doc_types):
        """Create a new collection.

        Until registered, collection assets are in-memory/overwritable.
        """
        if cls._register.exists(collection_id):
            raise KeyError(f"collection name {collection_id} already taken.")

        # create records db
        collection_schema = {}
        for doc_type in doc_types:
            collection_schema = update_dict(collection_schema, doc_type.schema)
        records_db = Data.new(collection_schema)

        # create/overwrite temporary search indexes
        _collection_id = f"tmp_{collection_id}"
        search_indices = []
        for doc_type in doc_types:
            index_name = return_index_name(_collection_id, doc_type.__name__)
            search_indices.append(index_name)
            # overwrite temp indices of same name if they exist
            if cls.client.index_exists(index_name):
                cls.client.drop_index(index_name)
            cls.client.new_index(index_name=index_name,
                                 mapping=doc_type.index_mapping)

        configd = {
            "doc_types": [doc_type.__name__ for doc_type in doc_types],
        }
        coll = cls(
            collection_id=collection_id,
            doc_types=doc_types,
            records_db=records_db,
            configd=configd,
        )
        coll.search_indices = search_indices
        return coll