def _new_request_file(request: flask.Request) -> str: """ Process an incoming Flask request and return path to temporary request file. """ request_files = request.files if request_files: # Multipart PUT request: # We expect two files: a JSON file "body" # and a binary file "user_code". # "body" contains the generator request and request_storage = request_files.get('body') request_dict = json.load(request_storage.stream) # "user_code" is a ZIP archive with the user code. user_code_storage = request_files.get('user_code') _, user_code_path = new_temp_file(suffix=user_code_storage.filename) user_code_storage.save(user_code_path) print(f' * User code file: {user_code_path}') else: # Not a multipart PUT request: Expect JSON request in # body content: request_dict = request.json user_code_path = None if not isinstance(request_dict, dict): print(f'Error: received invalid request: {request_dict}') raise werkzeug.exceptions.BadRequest('request data must be JSON') try: CubeGeneratorRequest.get_schema().validate_instance(request_dict) except jsonschema.ValidationError as e: print(f'Error: received invalid request: {request_dict}') raise werkzeug.exceptions.BadRequest('request is no valid JSON') from e if user_code_path: # If we have user code, alter the "code_config" # part of the generator request so it points to # the temporary ZIP archive. code_config = request_dict.get('code_config') if isinstance(code_config, dict): file_set = code_config.get('file_set') if isinstance(file_set, dict): file_set['path'] = user_code_path # Write the request to a temporary file _, request_path = new_temp_file(suffix='_request.yaml') with open(request_path, 'w') as stream: yaml.dump(request_dict, stream) print(f' * Request: {request_path}') return request_path
def test_output(self): _, output_path = new_temp_file(suffix='.json') result = self.invoke_cli(['versions', '-o', output_path]) self.assertEqual(0, result.exit_code) with open(output_path) as fp: text = fp.read() self.assertJson(text) _, output_path = new_temp_file(suffix='.yml') result = self.invoke_cli(['versions', '-o', output_path]) self.assertEqual(0, result.exit_code) with open(output_path) as fp: text = fp.read() self.assertYaml(text)
def test_new_temp_file(self): fd, file_path = new_temp_file() self.assertIsInstance(fd, int) self.assertIsInstance(file_path, str) self.assertTrue(os.path.basename(file_path).startswith('xcube-')) self.assertTrue(os.path.isabs(file_path)) self.assertTrue(os.path.isfile(file_path)) fd, file_path = new_temp_file(prefix='bibo.', suffix='.zip') self.assertIsInstance(fd, int) self.assertIsInstance(file_path, str) self.assertTrue(os.path.basename(file_path).startswith('bibo.')) self.assertTrue(os.path.basename(file_path).endswith('.zip')) self.assertTrue(os.path.isabs(file_path)) self.assertTrue(os.path.isfile(file_path))
def _init_local_store(): """ Initialize a "directory" data store with test datasets. """ from xcube.core.new import new_cube local_base_dir = new_temp_dir(suffix='_local_store') dataset_1 = new_cube(width=36, height=18, variables={'A': 0.1, 'B': 0.2}) dataset_2 = new_cube(width=36, height=18, variables={'C': 0.2, 'D': 0.3}) dataset_1.to_zarr(os.path.join(local_base_dir, 'DATASET-1.zarr')) dataset_2.to_zarr(os.path.join(local_base_dir, 'DATASET-2.zarr')) global STORES_CONFIG_PATH _, STORES_CONFIG_PATH = new_temp_file(suffix='_stores.yaml') with open(STORES_CONFIG_PATH, 'w') as stream: yaml.dump( { 'test': { 'title': 'Local test store', 'store_id': 'file', 'store_params': { 'root': local_base_dir } } }, stream) print(f' * Store base directory: {local_base_dir}') print(f' * Store configuration: {STORES_CONFIG_PATH}')
def to_local(self) -> 'FileSet': """ Turn this file set into an existing, local file set. """ if self.is_local(): return self details = self._get_details() fs, root = details.fs, details.root url_path = fsspec.core.strip_protocol(self.path) suffix = '' for suffix in reversed(url_path.split('/')): if suffix != '': break if root.endswith('/'): temp_dir = new_temp_dir(prefix=TEMP_FILE_PREFIX, suffix=suffix) # TODO: replace by loop so we can apply includes/excludes # before downloading actual files. See impl of fs.get(). fs.get(root, temp_dir + "/", recursive=True) return FileSet(temp_dir, sub_path=self.sub_path, includes=self.includes, excludes=self.excludes) else: _, temp_file = new_temp_file(prefix=TEMP_FILE_PREFIX, suffix=suffix) fs.get_file(root, temp_file) return FileSet(temp_file, sub_path=self.sub_path, includes=self.includes, excludes=self.excludes)
def write_data(self, data: xr.Dataset, data_id: str, replace=False, **write_params) -> str: assert_instance(data, xr.Dataset, name='data') assert_instance(data_id, str, name='data_id') fs, root, write_params = self.load_fs(write_params) if not replace and fs.exists(data_id): raise DataStoreError(f'Data resource {data_id} already exists') # This doesn't yet work as expected with fsspec and netcdf: # engine = write_params.pop('engine', 'scipy') # with fs.open(data_id, 'wb') as file: # data.to_netcdf(file, engine=engine, **write_params) is_local = is_local_fs(fs) if is_local: file_path = data_id else: _, file_path = new_temp_file(suffix='.nc') engine = write_params.pop('engine', 'netcdf4') data.to_netcdf(file_path, engine=engine, **write_params) if not is_local: fs.put_file(file_path, data_id) return data_id
def open_data(self, data_id: str, **open_params) -> gpd.GeoDataFrame: # TODO: implement me correctly, # this is not valid for shapefile AND geojson fs, root, open_params = self.load_fs(open_params) is_local = is_local_fs(fs) if is_local: file_path = data_id else: _, file_path = new_temp_file() fs.get_file(data_id, file_path) return gpd.read_file(file_path, driver=self.get_driver_name(), **open_params)
def write_data(self, data: gpd.GeoDataFrame, data_id: str, **write_params) -> str: # TODO: implement me correctly, # this is not valid for shapefile AND geojson assert_instance(data, (gpd.GeoDataFrame, pd.DataFrame), 'data') fs, root, write_params = self.load_fs(write_params) is_local = is_local_fs(fs) if is_local: file_path = data_id else: _, file_path = new_temp_file() data.to_file(file_path, driver=self.get_driver_name(), **write_params) if not is_local: fs.put_file(file_path, data_id) return data_id
def _write_local_zip(self, zip_path: Optional[str]) -> 'FileSet': """Write file set into local ZIP archive.""" local_dir_path = self.get_local_path() if not zip_path: _, zip_path = new_temp_file(prefix=TEMP_FILE_PREFIX, suffix='.zip') sub_path = _normalize_sub_path(self.sub_path) with zipfile.ZipFile(zip_path, 'w') as zip_file: for key in self.keys(): file_path = os.path.join(local_dir_path, key) zip_file.write(file_path, arcname=_strip_sub_path_from_key(key, sub_path)) return FileSet(zip_path, sub_path=self.sub_path)
def open_data(self, data_id: str, **open_params) -> xr.Dataset: assert_instance(data_id, str, name='data_id') fs, root, open_params = self.load_fs(open_params) # This doesn't yet work as expected with fsspec and netcdf: # engine = open_params.pop('engine', 'scipy') # with fs.open(data_id, 'rb') as file: # return xr.open_dataset(file, engine=engine, **open_params) is_local = is_local_fs(fs) if is_local: file_path = data_id else: _, file_path = new_temp_file(suffix='.nc') fs.get_file(data_id, file_path) engine = open_params.pop('engine', 'netcdf4') return xr.open_dataset(file_path, engine=engine, **open_params)
def _new_result_file() -> str: _, file_path = new_temp_file(prefix='xcube-gen2-', suffix='-result.json') return file_path