def __next__(self): if self.index > len(self.files) - 1: raise StopIteration self.index += 1 if self.unpack: return Document.from_kdxa(self.files[self.index - 1]) document = Document( DocumentMetadata({ "source_path": self.files[self.index - 1], "connector": self.get_name(), "mime_type": mimetypes.guess_type(self.files[self.index - 1]), "connector_options": { "path": self.path, "file_filter": self.file_filter } })) document.source.original_filename = os.path.basename( self.files[self.index - 1]) document.source.original_path = self.path document.source.connector = self.get_name() # TODO we need to get the checksum and last_updated and created times return document
def load_kdxa(self, path: str): """ Args: path: str: Returns: """ document = Document.from_kdxa(path) self.put(document.uuid, document)
def get_document_by_content_object( self, document_family: DocumentFamily, content_object: ContentObject) -> Document: """ Args: document_family (DocumentFamily): The document family content_object (ContentObject): The content object Returns: The Kodexa document related to the content family """ return Document.from_kdxa( os.path.join(self.store_path, content_object.id) + ".kdxa")
def get_by_path(self, path: str) -> Optional[Document]: """Return the latest document in the family at the given path Args: path: return: path: str: Returns: """ for family in self.metastore: if family.path == path: return Document.from_kdxa( os.path.join(self.store_path, family.get_latest_content().id) + ".kdxa") return None
def get_by_uuid(self, uuid: str) -> Optional[Document]: """ Args: uuid: str: Returns: """ for family in self.metastore: for content_object in family.content_objects: if content_object.id == uuid: return Document.from_kdxa( os.path.join(self.store_path, content_object.id) + ".kdxa") return None