def __init__(self, document: Document, filename: str = None, delete_on_close=False): self.document = document self.node_types = {} self.node_type_id_by_name = {} self.feature_type_id_by_name = {} self.feature_type_names = {} self.delete_on_close = delete_on_close import sqlite3 self.is_new = True if filename is not None: self.is_tmp = False path = pathlib.Path(filename) if path.exists(): # At this point we need to load the db self.is_new = False else: from kodexa import KodexaPlatform new_file, filename = tempfile.mkstemp(suffix='.kddb', dir=KodexaPlatform.get_tempdir()) self.is_tmp = True self.current_filename = filename self.connection = sqlite3.connect(filename) self.cursor = self.connection.cursor() self.cursor.execute("PRAGMA journal_mode=OFF") self.cursor.execute("pragma temp_store = memory") self.cursor.execute("pragma mmap_size = 30000000000")
def __init__(self, *args, **kwargs): if 'slug' not in kwargs: kwargs['slug'] = 'local' if 'type' not in kwargs: kwargs['type'] = 'DOCUMENT' if 'name' not in kwargs: kwargs['name'] = 'Local Document Store' super().__init__(*args, **kwargs) if self.store_path is None: from kodexa import KodexaPlatform self.store_path = tempfile.mkdtemp( dir=KodexaPlatform.get_tempdir()) logger.info( f"Creating new local model store in {self.store_path} since no path was provided" ) path = Path(self.store_path) if kwargs.get('force_initialize', False) and path.exists(): shutil.rmtree(self.store_path) if path.is_file(): raise Exception( "Unable to load store, since it is pointing to a file?") if not path.exists(): path.mkdir(parents=True)
def get_source(document): """ Args: document: Returns: """ # If we have an http URL then we should use requests, it is much # cleaner if document.source.original_path.startswith('http'): response = requests.get(document.source.original_path, headers=document.source.headers) return io.BytesIO(response.content) if document.source.headers: opener = urllib.request.build_opener() for header in document.source.headers: opener.addheaders = [(header, document.source.headers[header])] urllib.request.install_opener(opener) from kodexa import KodexaPlatform with tempfile.NamedTemporaryFile( delete=True, dir=KodexaPlatform.get_tempdir()) as tmp_file: urllib.request.urlretrieve(document.source.original_path, tmp_file.name) return open(tmp_file.name, 'rb')
def __init__(self, *args, **kwargs): if 'slug' not in kwargs: kwargs['slug'] = 'local' if 'type' not in kwargs: kwargs['type'] = 'DOCUMENT' if 'name' not in kwargs: kwargs['name'] = 'Local Document Store' if 'store_ref' not in kwargs: kwargs['store_ref'] = 'local/local' super().__init__(**kwargs) if self.store_path is None: from kodexa import KodexaPlatform self.store_path = tempfile.mkdtemp( dir=KodexaPlatform.get_tempdir()) logger.info( f"Creating new local document store in {self.store_path} since no path was provided" ) # Create an empty index file self.metastore = [] self.write_metastore() self.index = 0 self.metastore: List[DocumentFamily] = [] self.listeners: List = [] path = Path(self.store_path) if kwargs.get('force_initialize', False) and path.exists(): shutil.rmtree(self.store_path) if path.is_file(): raise Exception( "Unable to load store, since it is pointing to a file?") if not path.exists(): logger.info( f"Creating new local document store in {self.store_path}") path.mkdir(parents=True) # Create an empty index file self.metastore = [] self.write_metastore() self.read_metastore() logger.info( f"Found {len(self.metastore)} documents in {self.store_path}")