コード例 #1
0
ファイル: persistence.py プロジェクト: kodexa-ai/kodexa
    def __init__(self, document: Document, filename: str = None, delete_on_close=False):
        self.document = document

        self.node_types = {}
        self.node_type_id_by_name = {}
        self.feature_type_id_by_name = {}
        self.feature_type_names = {}
        self.delete_on_close = delete_on_close

        import sqlite3

        self.is_new = True
        if filename is not None:
            self.is_tmp = False
            path = pathlib.Path(filename)
            if path.exists():
                # At this point we need to load the db
                self.is_new = False
        else:
            from kodexa import KodexaPlatform
            new_file, filename = tempfile.mkstemp(suffix='.kddb', dir=KodexaPlatform.get_tempdir())
            self.is_tmp = True

        self.current_filename = filename

        self.connection = sqlite3.connect(filename)
        self.cursor = self.connection.cursor()
        self.cursor.execute("PRAGMA journal_mode=OFF")
        self.cursor.execute("pragma temp_store = memory")
        self.cursor.execute("pragma mmap_size = 30000000000")
コード例 #2
0
ファイル: local.py プロジェクト: kodexa-ai/kodexa
    def __init__(self, *args, **kwargs):
        if 'slug' not in kwargs:
            kwargs['slug'] = 'local'
        if 'type' not in kwargs:
            kwargs['type'] = 'DOCUMENT'
        if 'name' not in kwargs:
            kwargs['name'] = 'Local Document Store'
        super().__init__(*args, **kwargs)

        if self.store_path is None:
            from kodexa import KodexaPlatform
            self.store_path = tempfile.mkdtemp(
                dir=KodexaPlatform.get_tempdir())
            logger.info(
                f"Creating new local model store in {self.store_path} since no path was provided"
            )

        path = Path(self.store_path)

        if kwargs.get('force_initialize', False) and path.exists():
            shutil.rmtree(self.store_path)
        if path.is_file():
            raise Exception(
                "Unable to load store, since it is pointing to a file?")
        if not path.exists():
            path.mkdir(parents=True)
コード例 #3
0
    def get_source(document):
        """

        Args:
          document:

        Returns:

        """

        # If we have an http URL then we should use requests, it is much
        # cleaner
        if document.source.original_path.startswith('http'):
            response = requests.get(document.source.original_path,
                                    headers=document.source.headers)
            return io.BytesIO(response.content)

        if document.source.headers:
            opener = urllib.request.build_opener()
            for header in document.source.headers:
                opener.addheaders = [(header, document.source.headers[header])]
            urllib.request.install_opener(opener)
        from kodexa import KodexaPlatform
        with tempfile.NamedTemporaryFile(
                delete=True, dir=KodexaPlatform.get_tempdir()) as tmp_file:
            urllib.request.urlretrieve(document.source.original_path,
                                       tmp_file.name)

            return open(tmp_file.name, 'rb')
コード例 #4
0
ファイル: local.py プロジェクト: kodexa-ai/kodexa
    def __init__(self, *args, **kwargs):
        if 'slug' not in kwargs:
            kwargs['slug'] = 'local'
        if 'type' not in kwargs:
            kwargs['type'] = 'DOCUMENT'
        if 'name' not in kwargs:
            kwargs['name'] = 'Local Document Store'
        if 'store_ref' not in kwargs:
            kwargs['store_ref'] = 'local/local'
        super().__init__(**kwargs)

        if self.store_path is None:
            from kodexa import KodexaPlatform
            self.store_path = tempfile.mkdtemp(
                dir=KodexaPlatform.get_tempdir())
            logger.info(
                f"Creating new local document store in {self.store_path} since no path was provided"
            )

            # Create an empty index file
            self.metastore = []
            self.write_metastore()

        self.index = 0
        self.metastore: List[DocumentFamily] = []
        self.listeners: List = []

        path = Path(self.store_path)

        if kwargs.get('force_initialize', False) and path.exists():
            shutil.rmtree(self.store_path)

        if path.is_file():
            raise Exception(
                "Unable to load store, since it is pointing to a file?")
        if not path.exists():
            logger.info(
                f"Creating new local document store in {self.store_path}")
            path.mkdir(parents=True)

            # Create an empty index file
            self.metastore = []
            self.write_metastore()

        self.read_metastore()

        logger.info(
            f"Found {len(self.metastore)} documents in {self.store_path}")