Exemple #1
0
    def __next__(self):
        if self.index > len(self.files) - 1:
            raise StopIteration

        self.index += 1
        if self.unpack:
            return Document.from_kdxa(self.files[self.index - 1])

        document = Document(
            DocumentMetadata({
                "source_path":
                self.files[self.index - 1],
                "connector":
                self.get_name(),
                "mime_type":
                mimetypes.guess_type(self.files[self.index - 1]),
                "connector_options": {
                    "path": self.path,
                    "file_filter": self.file_filter
                }
            }))
        document.source.original_filename = os.path.basename(
            self.files[self.index - 1])
        document.source.original_path = self.path
        document.source.connector = self.get_name()

        # TODO we need to get the checksum and last_updated and created times
        return document
Exemple #2
0
    def load_kdxa(self, path: str):
        """

        Args:
          path: str:

        Returns:

        """
        document = Document.from_kdxa(path)
        self.put(document.uuid, document)
Exemple #3
0
    def get_document_by_content_object(
            self, document_family: DocumentFamily,
            content_object: ContentObject) -> Document:
        """

        Args:
          document_family (DocumentFamily): The document family
          content_object (ContentObject): The content object

        Returns:
          The Kodexa document related to the content family

        """
        return Document.from_kdxa(
            os.path.join(self.store_path, content_object.id) + ".kdxa")
Exemple #4
0
    def get_by_path(self, path: str) -> Optional[Document]:
        """Return the latest document in the family at the given path

        Args:
          path: return:
          path: str:

        Returns:

        """
        for family in self.metastore:
            if family.path == path:
                return Document.from_kdxa(
                    os.path.join(self.store_path,
                                 family.get_latest_content().id) + ".kdxa")
        return None
Exemple #5
0
    def get_by_uuid(self, uuid: str) -> Optional[Document]:
        """

        Args:
          uuid: str:

        Returns:

        """
        for family in self.metastore:
            for content_object in family.content_objects:

                if content_object.id == uuid:
                    return Document.from_kdxa(
                        os.path.join(self.store_path, content_object.id) +
                        ".kdxa")
        return None