def test_fetch_file_local_with_protocol(): """ Test function :func:`weaver.utils.fetch_file` when the reference is a pre-fetched local file. """ tmp_dir = tempfile.gettempdir() with tempfile.NamedTemporaryFile(dir=tmp_dir, mode="w", suffix=".json") as tmp_json: tmp_data = {"message": "fetch-file-protocol"} tmp_json.write(json.dumps(tmp_data)) tmp_json.seek(0) tmp_name = os.path.split(tmp_json.name)[-1] res_dir = os.path.join(tmp_dir, inspect.currentframe().f_code.co_name) res_path = os.path.join(res_dir, tmp_name) try: make_dirs(res_dir, exist_ok=True) for protocol in ["", "file://"]: tmp_path = protocol + tmp_json.name fetch_file(tmp_path, res_dir) assert os.path.isfile( res_path ), "File [{}] should be accessible under [{}]".format( tmp_path, res_path) assert json.load( open(res_path) ) == tmp_data, "File should be properly copied/referenced from original" except Exception: raise finally: shutil.rmtree(res_dir, ignore_errors=True)
def run(self, runtimeContext, # type: RuntimeContext tmpdir_lock=None, # type: Optional[ThreadLock] ): # type: (...) -> None make_dirs(self.tmpdir, exist_ok=True) env = self.environment vars_to_preserve = runtimeContext.preserve_environment if runtimeContext.preserve_entire_environment: vars_to_preserve = os.environ if vars_to_preserve is not None: for key, value in os.environ.items(): if key in vars_to_preserve and key not in env: # On Windows, subprocess env can't handle unicode. env[key] = str(value) if onWindows() else value env["HOME"] = str(self.outdir) if onWindows() else self.outdir env["TMPDIR"] = str(self.tmpdir) if onWindows() else self.tmpdir if "PATH" not in env: env["PATH"] = str(os.environ["PATH"]) if onWindows() else os.environ["PATH"] if "SYSTEMROOT" not in env and "SYSTEMROOT" in os.environ: env["SYSTEMROOT"] = str(os.environ["SYSTEMROOT"]) if onWindows() else os.environ["SYSTEMROOT"] # stageFiles(self.pathmapper, ignoreWritable=True, symLink=True, secret_store=runtimeContext.secret_store) if self.generatemapper: # FIXME: see if this is needed... func doesn't exist anymore in cwltool 2.x # stageFiles(self.generatemapper, ignoreWritable=self.inplace_update, # symLink=True, secret_store=runtimeContext.secret_store) relink_initialworkdir(self.generatemapper, self.outdir, self.builder.outdir, inplace_update=self.inplace_update) self.execute([], env, runtimeContext)
def test_fetch_file_remote_with_request(): """ Test function :func:`weaver.utils.fetch_file` when the reference is an URL. Also validates retries of the failing request. """ tmp_dir = tempfile.gettempdir() with contextlib.ExitStack() as stack: tmp_json = stack.enter_context( tempfile.NamedTemporaryFile(dir=tmp_dir, mode="w", suffix=".json")) tmp_data = {"message": "fetch-file-request"} tmp_json.write(json.dumps(tmp_data)) tmp_json.seek(0) tmp_name = os.path.split(tmp_json.name)[-1] tmp_http = "http://weaver.mock" + tmp_json.name tmp_retry = 2 # share in below mocked_request, 'nonlocal' back compatible with Python 2 tmp = {"retry": tmp_retry, "json": tmp_json, "http": tmp_http} def mocked_request(*args, **kwargs): # noqa: E811 tmp["retry"] -= 1 if not tmp["retry"]: return mocked_file_response(tmp["json"].name, tmp["http"]) resp = HTTPInternalServerError( ) # internal retry expect at least a 5xx code to retry return resp # will be available on next call (to test retries) stack.enter_context( mock.patch("requests.request", side_effect=mocked_request)) stack.enter_context( mock.patch("requests.sessions.Session.request", side_effect=mocked_request)) m_request = stack.enter_context( mock.patch("requests.Session.request", side_effect=mocked_request)) res_dir = os.path.join(tmp_dir, inspect.currentframe().f_code.co_name) res_path = os.path.join(res_dir, tmp_name) try: make_dirs(res_dir, exist_ok=True) fetch_file(tmp_http, res_dir, retry=tmp_retry + 1) assert os.path.isfile( res_path), "File [{}] should be accessible under [{}]".format( tmp_http, res_path) assert m_request.call_count == 2, "Request method should have been called twice because of retries" assert json.load( open(res_path) ) == tmp_data, "File should be properly generated from HTTP reference" except Exception: raise finally: shutil.rmtree(res_dir, ignore_errors=True)
def test_fetch_file_local_links(): """ Test handling of symbolic links by function :func:`weaver.utils.fetch_file` for local files. """ tmp_dir = tempfile.gettempdir() src_dir = os.path.join(tmp_dir, str(uuid.uuid4())) dst_dir = os.path.join(tmp_dir, str(uuid.uuid4())) try: make_dirs(src_dir, exist_ok=True) make_dirs(dst_dir, exist_ok=True) with tempfile.NamedTemporaryFile(dir=src_dir, mode="w", suffix=".json") as tmp_json: tmp_data = {"message": "fetch-file-link"} tmp_json.write(json.dumps(tmp_data)) tmp_json.seek(0) tmp_file = tmp_json.name tmp_path, tmp_name = os.path.split(tmp_file) tmp_link = os.path.join(tmp_path, "link.json") os.symlink(tmp_file, tmp_link) dst_path = os.path.join(dst_dir, tmp_name) for src_path, as_link, result_link in [ (tmp_file, True, True), (tmp_file, False, False), (tmp_file, None, False), (tmp_link, True, True), (tmp_link, False, False), (tmp_link, None, True), ]: if os.path.exists(dst_path): os.remove(dst_path) fetch_file(src_path, dst_dir, link=as_link) assert os.path.isfile(dst_path), ( f"File [{tmp_file}] should be accessible under [{dst_path}]. " f"Failed with: {(src_path, as_link, result_link)}" ) if result_link: assert os.path.islink(dst_path), "Result is not a link when it is expected to be one." else: assert not os.path.islink(dst_path), "Result is a link when it is expected not to be one." assert json.load(open(dst_path)) == tmp_data, "File should be properly copied/referenced from original" except OSError as exc: pytest.fail(f"Unexpected error raised during test: [{exc!s}]") finally: shutil.rmtree(src_dir, ignore_errors=True) shutil.rmtree(dst_dir, ignore_errors=True)
def setup_logger(cls): if cls.logger_enabled: if not isinstance(cls.logger_level, int): cls.logger_level = logging.getLevelName(cls.logger_level) make_dirs(cls.logger_result_dir, exist_ok=True) log_path = os.path.abspath(os.path.join(cls.logger_result_dir, cls.__name__ + ".log")) log_fmt = logging.Formatter("%(message)s") # only message to avoid 'log-name INFO' offsetting outputs log_file = logging.FileHandler(log_path) log_file.setFormatter(log_fmt) log_term = logging.StreamHandler() log_term.setFormatter(log_fmt) cls.logger_separator_calls = "-" * 80 + "\n" # used between function calls (of same request) cls.logger_separator_steps = "=" * 80 + "\n" # used between overall test steps (between requests) cls.logger_separator_tests = "*" * 80 + "\n" # used between various test runs (each test_* method) cls.logger_separator_cases = "#" * 80 + "\n" # used between various TestCase runs cls.logger = logging.getLogger(cls.__name__) cls.logger.setLevel(cls.logger_level) cls.logger.addHandler(log_file) cls.logger.addHandler(log_term)
def load_pywps_config(container, config=None): # type: (AnySettingsContainer, Optional[Union[str, Dict[str, str]]]) -> ConfigParser """ Loads and updates the PyWPS configuration using Weaver settings. """ settings = get_settings(container) if settings.get("weaver.wps_configured"): LOGGER.debug("Using preloaded internal Weaver WPS configuration.") return pywps_config.CONFIG LOGGER.info("Initial load of internal Weaver WPS configuration.") pywps_config.load_configuration([]) # load defaults pywps_config.CONFIG.set("logging", "db_echo", "false") if logging.getLevelName(pywps_config.CONFIG.get("logging", "level")) <= logging.DEBUG: pywps_config.CONFIG.set("logging", "level", "INFO") # update metadata LOGGER.debug("Updating WPS metadata configuration.") for setting_name, setting_value in settings.items(): if setting_name.startswith("weaver.wps_metadata"): pywps_setting = setting_name.replace("weaver.wps_metadata_", "") pywps_config.CONFIG.set("metadata:main", pywps_setting, setting_value) # add weaver configuration keyword if not already provided wps_keywords = pywps_config.CONFIG.get("metadata:main", "identification_keywords") weaver_mode = get_weaver_configuration(settings) if weaver_mode not in wps_keywords: wps_keywords += ("," if wps_keywords else "") + weaver_mode pywps_config.CONFIG.set("metadata:main", "identification_keywords", wps_keywords) # add additional config passed as dictionary of {'section.key': 'value'} if isinstance(config, dict): for key, value in config.items(): section, key = key.split(".") pywps_config.CONFIG.set(section, key, value) # cleanup alternative dict "PYWPS_CFG" which is not expected elsewhere if isinstance(settings.get("PYWPS_CFG"), dict): del settings["PYWPS_CFG"] # set accepted languages aligned with values provided by REST API endpoints # otherwise, execute request could fail due to languages considered not supported languages = ", ".join(AcceptLanguage.values()) LOGGER.debug("Setting WPS languages: [%s]", languages) pywps_config.CONFIG.set("server", "language", languages) LOGGER.debug("Updating WPS output configuration.") # find output directory from app config or wps config if "weaver.wps_output_dir" not in settings: output_dir = pywps_config.get_config_value("server", "outputpath") settings["weaver.wps_output_dir"] = output_dir # ensure the output dir exists if specified output_dir = get_wps_output_dir(settings) make_dirs(output_dir, exist_ok=True) # find output url from app config (path/url) or wps config (url only) # note: needs to be configured even when using S3 bucket since XML status is provided locally if "weaver.wps_output_url" not in settings: output_path = settings.get("weaver.wps_output_path", "").rstrip("/") if output_path and isinstance(output_path, str): output_url = os.path.join(get_weaver_url(settings), output_path.strip("/")) else: output_url = pywps_config.get_config_value("server", "outputurl") settings["weaver.wps_output_url"] = output_url # apply workdir if provided, otherwise use default if "weaver.wps_workdir" in settings: make_dirs(settings["weaver.wps_workdir"], exist_ok=True) pywps_config.CONFIG.set("server", "workdir", settings["weaver.wps_workdir"]) # configure S3 bucket if requested, storage of all process outputs # note: # credentials and default profile are picked up automatically by 'boto3' from local AWS configs or env vars # region can also be picked from there unless explicitly provided by weaver config # warning: # if we set `(server, storagetype, s3)`, ALL status (including XML) are stored to S3 # to preserve status locally, we set 'file' and override the storage instance during output rewrite in WpsPackage # we can still make use of the server configurations here to make this overridden storage auto-find its configs s3_bucket = settings.get("weaver.wps_output_s3_bucket") pywps_config.CONFIG.set("server", "storagetype", "file") # pywps_config.CONFIG.set("server", "storagetype", "s3") if s3_bucket: LOGGER.debug("Updating WPS S3 bucket configuration.") import boto3 from botocore.exceptions import ClientError s3 = boto3.client("s3") s3_region = settings.get("weaver.wps_output_s3_region", s3.meta.region_name) LOGGER.info( "Validating that S3 [Bucket=%s, Region=%s] exists or creating it.", s3_bucket, s3_region) try: s3.create_bucket( Bucket=s3_bucket, CreateBucketConfiguration={"LocationConstraint": s3_region}) LOGGER.info("S3 bucket for WPS output created.") except ClientError as exc: if exc.response.get("Error", {}).get("Code") != "BucketAlreadyExists": LOGGER.error("Failed setup of S3 bucket for WPS output: [%s]", exc) raise LOGGER.info("S3 bucket for WPS output already exists.") pywps_config.CONFIG.set("s3", "region", s3_region) pywps_config.CONFIG.set("s3", "bucket", s3_bucket) pywps_config.CONFIG.set( "s3", "public", "false") # don't automatically push results as publicly accessible pywps_config.CONFIG.set( "s3", "encrypt", "true") # encrypts data server-side, transparent from this side # enforce back resolved values onto PyWPS config pywps_config.CONFIG.set("server", "setworkdir", "true") pywps_config.CONFIG.set("server", "sethomedir", "true") pywps_config.CONFIG.set("server", "outputpath", settings["weaver.wps_output_dir"]) pywps_config.CONFIG.set("server", "outputurl", settings["weaver.wps_output_url"]) pywps_config.CONFIG.set("server", "url", get_wps_url(settings, load=False)) settings["weaver.wps_configured"] = True return pywps_config.CONFIG
def test_fetch_file_http_content_disposition_filename(): tmp_dir = tempfile.gettempdir() with contextlib.ExitStack() as stack: tmp_json = stack.enter_context(tempfile.NamedTemporaryFile(dir=tmp_dir, mode="w", suffix=".json")) # noqa tmp_data = {"message": "fetch-file-request"} tmp_text = json.dumps(tmp_data) tmp_json.write(tmp_text) tmp_json.seek(0) tmp_random = "123456" tmp_normal = "spécial.json" tmp_escape = quote(tmp_normal) # % characters tmp_name = os.path.split(tmp_json.name)[-1] tmp_http = f"http://weaver.mock/{tmp_random}" # pseudo endpoint where file name is not directly visible def mock_response(__request, test_headers): test_headers.update({ "Content-Type": ContentType.APP_JSON, "Content-Length": str(len(tmp_text)) }) return 200, headers, tmp_text res_dir = os.path.join(tmp_dir, str(uuid.uuid4())) req_mock = stack.enter_context(responses.RequestsMock()) try: make_dirs(res_dir, exist_ok=True) for target, headers in [ (tmp_name, { "Content-Disposition": f"attachment; filename=\"{tmp_name}\";filename*=UTF-8''{tmp_name}" }), (tmp_name, { # unusual spacing/order does not matter "Content-Disposition": f" filename*=UTF-8''{tmp_name}; filename=\"{tmp_name}\";attachment;" }), (tmp_name, { "Content-Disposition": f"attachment; filename=\"{tmp_name}\"" }), (tmp_name, { "Content-Disposition": f"attachment; filename={tmp_name}" }), (tmp_normal, { "Content-Disposition": f"attachment; filename=\"{tmp_normal}\";filename*=UTF-8''{tmp_escape}" }), (tmp_normal, { # disallowed escape character in 'filename', but 'filename*' is valid and used first "Content-Disposition": f"attachment; filename=\"{tmp_escape}\";filename*=UTF-8''{tmp_normal}" }), (tmp_random, { # disallowed escape character in 'filename', reject since no alternative "Content-Disposition": f"attachment; filename=\"{tmp_escape}\"" }), (tmp_random, { # empty header "Content-Disposition": "" }), (tmp_random, { # missing header }), (tmp_random, { # missing filename "Content-Disposition": "attachment" }), (tmp_random, { # invalid filename "Content-Disposition": "attachment; filename*=UTF-8''exec%20'echo%20test'" }), (tmp_random, { # invalid encoding "Content-Disposition": "attachment; filename*=random''%47%4F%4F%44.json" }), ("GOOD.json", { # valid encoding and allowed characters after escape "Content-Disposition": "attachment; filename*=UTF-8''%47%4F%4F%44.json" }) ]: req_mock.remove("GET", tmp_http) # reset previous iter req_mock.add_callback("GET", tmp_http, callback=lambda req: mock_response(req, headers)) try: res_path = fetch_file(tmp_http, res_dir) except Exception as exc: raise AssertionError(f"Unexpected exception when testing with: [{headers}]. Exception: [{exc}]") assert res_path == os.path.join(res_dir, target), f"Not expected name when testing with: [{headers}]" assert os.path.isfile(res_path), f"File [{tmp_http}] should be accessible under [{res_path}]" assert json.load(open(res_path)) == tmp_data, "File should be properly generated from HTTP reference" except Exception: raise finally: shutil.rmtree(res_dir, ignore_errors=True)