Ejemplo n.º 1
0
def test_fetch_file_local_with_protocol():
    """
    Test function :func:`weaver.utils.fetch_file` when the reference is a pre-fetched local file.
    """
    tmp_dir = tempfile.gettempdir()
    with tempfile.NamedTemporaryFile(dir=tmp_dir, mode="w",
                                     suffix=".json") as tmp_json:
        tmp_data = {"message": "fetch-file-protocol"}
        tmp_json.write(json.dumps(tmp_data))
        tmp_json.seek(0)
        tmp_name = os.path.split(tmp_json.name)[-1]
        res_dir = os.path.join(tmp_dir, inspect.currentframe().f_code.co_name)
        res_path = os.path.join(res_dir, tmp_name)
        try:
            make_dirs(res_dir, exist_ok=True)
            for protocol in ["", "file://"]:
                tmp_path = protocol + tmp_json.name
                fetch_file(tmp_path, res_dir)
                assert os.path.isfile(
                    res_path
                ), "File [{}] should be accessible under [{}]".format(
                    tmp_path, res_path)
                assert json.load(
                    open(res_path)
                ) == tmp_data, "File should be properly copied/referenced from original"
        except Exception:
            raise
        finally:
            shutil.rmtree(res_dir, ignore_errors=True)
Ejemplo n.º 2
0
    def run(self,
            runtimeContext,     # type: RuntimeContext
            tmpdir_lock=None,   # type: Optional[ThreadLock]
            ):                  # type: (...) -> None

        make_dirs(self.tmpdir, exist_ok=True)
        env = self.environment
        vars_to_preserve = runtimeContext.preserve_environment
        if runtimeContext.preserve_entire_environment:
            vars_to_preserve = os.environ
        if vars_to_preserve is not None:
            for key, value in os.environ.items():
                if key in vars_to_preserve and key not in env:
                    # On Windows, subprocess env can't handle unicode.
                    env[key] = str(value) if onWindows() else value
        env["HOME"] = str(self.outdir) if onWindows() else self.outdir
        env["TMPDIR"] = str(self.tmpdir) if onWindows() else self.tmpdir
        if "PATH" not in env:
            env["PATH"] = str(os.environ["PATH"]) if onWindows() else os.environ["PATH"]
        if "SYSTEMROOT" not in env and "SYSTEMROOT" in os.environ:
            env["SYSTEMROOT"] = str(os.environ["SYSTEMROOT"]) if onWindows() else os.environ["SYSTEMROOT"]

        # stageFiles(self.pathmapper, ignoreWritable=True, symLink=True, secret_store=runtimeContext.secret_store)
        if self.generatemapper:
            # FIXME: see if this is needed... func doesn't exist anymore in cwltool 2.x
            # stageFiles(self.generatemapper, ignoreWritable=self.inplace_update,
            #            symLink=True, secret_store=runtimeContext.secret_store)
            relink_initialworkdir(self.generatemapper, self.outdir,
                                  self.builder.outdir, inplace_update=self.inplace_update)

        self.execute([], env, runtimeContext)
Ejemplo n.º 3
0
def test_fetch_file_remote_with_request():
    """
    Test function :func:`weaver.utils.fetch_file` when the reference is an URL.
    Also validates retries of the failing request.
    """
    tmp_dir = tempfile.gettempdir()
    with contextlib.ExitStack() as stack:
        tmp_json = stack.enter_context(
            tempfile.NamedTemporaryFile(dir=tmp_dir, mode="w", suffix=".json"))
        tmp_data = {"message": "fetch-file-request"}
        tmp_json.write(json.dumps(tmp_data))
        tmp_json.seek(0)
        tmp_name = os.path.split(tmp_json.name)[-1]
        tmp_http = "http://weaver.mock" + tmp_json.name
        tmp_retry = 2

        # share in below mocked_request, 'nonlocal' back compatible with Python 2
        tmp = {"retry": tmp_retry, "json": tmp_json, "http": tmp_http}

        def mocked_request(*args, **kwargs):  # noqa: E811
            tmp["retry"] -= 1
            if not tmp["retry"]:
                return mocked_file_response(tmp["json"].name, tmp["http"])
            resp = HTTPInternalServerError(
            )  # internal retry expect at least a 5xx code to retry
            return resp  # will be available on next call (to test retries)

        stack.enter_context(
            mock.patch("requests.request", side_effect=mocked_request))
        stack.enter_context(
            mock.patch("requests.sessions.Session.request",
                       side_effect=mocked_request))
        m_request = stack.enter_context(
            mock.patch("requests.Session.request", side_effect=mocked_request))

        res_dir = os.path.join(tmp_dir, inspect.currentframe().f_code.co_name)
        res_path = os.path.join(res_dir, tmp_name)
        try:
            make_dirs(res_dir, exist_ok=True)
            fetch_file(tmp_http, res_dir, retry=tmp_retry + 1)
            assert os.path.isfile(
                res_path), "File [{}] should be accessible under [{}]".format(
                    tmp_http, res_path)
            assert m_request.call_count == 2, "Request method should have been called twice because of retries"
            assert json.load(
                open(res_path)
            ) == tmp_data, "File should be properly generated from HTTP reference"
        except Exception:
            raise
        finally:
            shutil.rmtree(res_dir, ignore_errors=True)
Ejemplo n.º 4
0
def test_fetch_file_local_links():
    """
    Test handling of symbolic links by function :func:`weaver.utils.fetch_file` for local files.
    """
    tmp_dir = tempfile.gettempdir()
    src_dir = os.path.join(tmp_dir, str(uuid.uuid4()))
    dst_dir = os.path.join(tmp_dir, str(uuid.uuid4()))
    try:
        make_dirs(src_dir, exist_ok=True)
        make_dirs(dst_dir, exist_ok=True)
        with tempfile.NamedTemporaryFile(dir=src_dir, mode="w", suffix=".json") as tmp_json:
            tmp_data = {"message": "fetch-file-link"}
            tmp_json.write(json.dumps(tmp_data))
            tmp_json.seek(0)
            tmp_file = tmp_json.name
            tmp_path, tmp_name = os.path.split(tmp_file)
            tmp_link = os.path.join(tmp_path, "link.json")
            os.symlink(tmp_file, tmp_link)
            dst_path = os.path.join(dst_dir, tmp_name)
            for src_path, as_link, result_link in [
                (tmp_file, True, True),
                (tmp_file, False, False),
                (tmp_file, None, False),
                (tmp_link, True, True),
                (tmp_link, False, False),
                (tmp_link, None, True),
            ]:
                if os.path.exists(dst_path):
                    os.remove(dst_path)
                fetch_file(src_path, dst_dir, link=as_link)
                assert os.path.isfile(dst_path), (
                    f"File [{tmp_file}] should be accessible under [{dst_path}]. "
                    f"Failed with: {(src_path, as_link, result_link)}"
                )
                if result_link:
                    assert os.path.islink(dst_path), "Result is not a link when it is expected to be one."
                else:
                    assert not os.path.islink(dst_path), "Result is a link when it is expected not to be one."
                assert json.load(open(dst_path)) == tmp_data, "File should be properly copied/referenced from original"
    except OSError as exc:
        pytest.fail(f"Unexpected error raised during test: [{exc!s}]")
    finally:
        shutil.rmtree(src_dir, ignore_errors=True)
        shutil.rmtree(dst_dir, ignore_errors=True)
Ejemplo n.º 5
0
 def setup_logger(cls):
     if cls.logger_enabled:
         if not isinstance(cls.logger_level, int):
             cls.logger_level = logging.getLevelName(cls.logger_level)
         make_dirs(cls.logger_result_dir, exist_ok=True)
         log_path = os.path.abspath(os.path.join(cls.logger_result_dir, cls.__name__ + ".log"))
         log_fmt = logging.Formatter("%(message)s")      # only message to avoid 'log-name INFO' offsetting outputs
         log_file = logging.FileHandler(log_path)
         log_file.setFormatter(log_fmt)
         log_term = logging.StreamHandler()
         log_term.setFormatter(log_fmt)
         cls.logger_separator_calls = "-" * 80 + "\n"    # used between function calls (of same request)
         cls.logger_separator_steps = "=" * 80 + "\n"    # used between overall test steps (between requests)
         cls.logger_separator_tests = "*" * 80 + "\n"    # used between various test runs (each test_* method)
         cls.logger_separator_cases = "#" * 80 + "\n"    # used between various TestCase runs
         cls.logger = logging.getLogger(cls.__name__)
         cls.logger.setLevel(cls.logger_level)
         cls.logger.addHandler(log_file)
         cls.logger.addHandler(log_term)
Ejemplo n.º 6
0
def load_pywps_config(container, config=None):
    # type: (AnySettingsContainer, Optional[Union[str, Dict[str, str]]]) -> ConfigParser
    """
    Loads and updates the PyWPS configuration using Weaver settings.
    """
    settings = get_settings(container)
    if settings.get("weaver.wps_configured"):
        LOGGER.debug("Using preloaded internal Weaver WPS configuration.")
        return pywps_config.CONFIG

    LOGGER.info("Initial load of internal Weaver WPS configuration.")
    pywps_config.load_configuration([])  # load defaults
    pywps_config.CONFIG.set("logging", "db_echo", "false")
    if logging.getLevelName(pywps_config.CONFIG.get("logging",
                                                    "level")) <= logging.DEBUG:
        pywps_config.CONFIG.set("logging", "level", "INFO")

    # update metadata
    LOGGER.debug("Updating WPS metadata configuration.")
    for setting_name, setting_value in settings.items():
        if setting_name.startswith("weaver.wps_metadata"):
            pywps_setting = setting_name.replace("weaver.wps_metadata_", "")
            pywps_config.CONFIG.set("metadata:main", pywps_setting,
                                    setting_value)
    # add weaver configuration keyword if not already provided
    wps_keywords = pywps_config.CONFIG.get("metadata:main",
                                           "identification_keywords")
    weaver_mode = get_weaver_configuration(settings)
    if weaver_mode not in wps_keywords:
        wps_keywords += ("," if wps_keywords else "") + weaver_mode
        pywps_config.CONFIG.set("metadata:main", "identification_keywords",
                                wps_keywords)
    # add additional config passed as dictionary of {'section.key': 'value'}
    if isinstance(config, dict):
        for key, value in config.items():
            section, key = key.split(".")
            pywps_config.CONFIG.set(section, key, value)
        # cleanup alternative dict "PYWPS_CFG" which is not expected elsewhere
        if isinstance(settings.get("PYWPS_CFG"), dict):
            del settings["PYWPS_CFG"]

    # set accepted languages aligned with values provided by REST API endpoints
    # otherwise, execute request could fail due to languages considered not supported
    languages = ", ".join(AcceptLanguage.values())
    LOGGER.debug("Setting WPS languages: [%s]", languages)
    pywps_config.CONFIG.set("server", "language", languages)

    LOGGER.debug("Updating WPS output configuration.")
    # find output directory from app config or wps config
    if "weaver.wps_output_dir" not in settings:
        output_dir = pywps_config.get_config_value("server", "outputpath")
        settings["weaver.wps_output_dir"] = output_dir
    # ensure the output dir exists if specified
    output_dir = get_wps_output_dir(settings)
    make_dirs(output_dir, exist_ok=True)
    # find output url from app config (path/url) or wps config (url only)
    # note: needs to be configured even when using S3 bucket since XML status is provided locally
    if "weaver.wps_output_url" not in settings:
        output_path = settings.get("weaver.wps_output_path", "").rstrip("/")
        if output_path and isinstance(output_path, str):
            output_url = os.path.join(get_weaver_url(settings),
                                      output_path.strip("/"))
        else:
            output_url = pywps_config.get_config_value("server", "outputurl")
        settings["weaver.wps_output_url"] = output_url
    # apply workdir if provided, otherwise use default
    if "weaver.wps_workdir" in settings:
        make_dirs(settings["weaver.wps_workdir"], exist_ok=True)
        pywps_config.CONFIG.set("server", "workdir",
                                settings["weaver.wps_workdir"])

    # configure S3 bucket if requested, storage of all process outputs
    # note:
    #   credentials and default profile are picked up automatically by 'boto3' from local AWS configs or env vars
    #   region can also be picked from there unless explicitly provided by weaver config
    # warning:
    #   if we set `(server, storagetype, s3)`, ALL status (including XML) are stored to S3
    #   to preserve status locally, we set 'file' and override the storage instance during output rewrite in WpsPackage
    #   we can still make use of the server configurations here to make this overridden storage auto-find its configs
    s3_bucket = settings.get("weaver.wps_output_s3_bucket")
    pywps_config.CONFIG.set("server", "storagetype", "file")
    # pywps_config.CONFIG.set("server", "storagetype", "s3")
    if s3_bucket:
        LOGGER.debug("Updating WPS S3 bucket configuration.")
        import boto3
        from botocore.exceptions import ClientError
        s3 = boto3.client("s3")
        s3_region = settings.get("weaver.wps_output_s3_region",
                                 s3.meta.region_name)
        LOGGER.info(
            "Validating that S3 [Bucket=%s, Region=%s] exists or creating it.",
            s3_bucket, s3_region)
        try:
            s3.create_bucket(
                Bucket=s3_bucket,
                CreateBucketConfiguration={"LocationConstraint": s3_region})
            LOGGER.info("S3 bucket for WPS output created.")
        except ClientError as exc:
            if exc.response.get("Error",
                                {}).get("Code") != "BucketAlreadyExists":
                LOGGER.error("Failed setup of S3 bucket for WPS output: [%s]",
                             exc)
                raise
            LOGGER.info("S3 bucket for WPS output already exists.")
        pywps_config.CONFIG.set("s3", "region", s3_region)
        pywps_config.CONFIG.set("s3", "bucket", s3_bucket)
        pywps_config.CONFIG.set(
            "s3", "public",
            "false")  # don't automatically push results as publicly accessible
        pywps_config.CONFIG.set(
            "s3", "encrypt",
            "true")  # encrypts data server-side, transparent from this side

    # enforce back resolved values onto PyWPS config
    pywps_config.CONFIG.set("server", "setworkdir", "true")
    pywps_config.CONFIG.set("server", "sethomedir", "true")
    pywps_config.CONFIG.set("server", "outputpath",
                            settings["weaver.wps_output_dir"])
    pywps_config.CONFIG.set("server", "outputurl",
                            settings["weaver.wps_output_url"])
    pywps_config.CONFIG.set("server", "url", get_wps_url(settings, load=False))
    settings["weaver.wps_configured"] = True
    return pywps_config.CONFIG
Ejemplo n.º 7
0
def test_fetch_file_http_content_disposition_filename():
    tmp_dir = tempfile.gettempdir()
    with contextlib.ExitStack() as stack:
        tmp_json = stack.enter_context(tempfile.NamedTemporaryFile(dir=tmp_dir, mode="w", suffix=".json"))  # noqa
        tmp_data = {"message": "fetch-file-request"}
        tmp_text = json.dumps(tmp_data)
        tmp_json.write(tmp_text)
        tmp_json.seek(0)

        tmp_random = "123456"
        tmp_normal = "spécial.json"
        tmp_escape = quote(tmp_normal)  # % characters
        tmp_name = os.path.split(tmp_json.name)[-1]
        tmp_http = f"http://weaver.mock/{tmp_random}"  # pseudo endpoint where file name is not directly visible

        def mock_response(__request, test_headers):
            test_headers.update({
                "Content-Type": ContentType.APP_JSON,
                "Content-Length": str(len(tmp_text))
            })
            return 200, headers, tmp_text

        res_dir = os.path.join(tmp_dir, str(uuid.uuid4()))
        req_mock = stack.enter_context(responses.RequestsMock())
        try:
            make_dirs(res_dir, exist_ok=True)
            for target, headers in [
                (tmp_name, {
                    "Content-Disposition": f"attachment; filename=\"{tmp_name}\";filename*=UTF-8''{tmp_name}"
                }),
                (tmp_name, {  # unusual spacing/order does not matter
                    "Content-Disposition": f" filename*=UTF-8''{tmp_name};   filename=\"{tmp_name}\";attachment;"
                }),
                (tmp_name, {
                    "Content-Disposition": f"attachment; filename=\"{tmp_name}\""
                }),
                (tmp_name, {
                    "Content-Disposition": f"attachment; filename={tmp_name}"
                }),
                (tmp_normal, {
                    "Content-Disposition": f"attachment; filename=\"{tmp_normal}\";filename*=UTF-8''{tmp_escape}"
                }),
                (tmp_normal, {  # disallowed escape character in 'filename', but 'filename*' is valid and used first
                    "Content-Disposition": f"attachment; filename=\"{tmp_escape}\";filename*=UTF-8''{tmp_normal}"
                }),
                (tmp_random, {  # disallowed escape character in 'filename', reject since no alternative
                    "Content-Disposition": f"attachment; filename=\"{tmp_escape}\""
                }),
                (tmp_random, {  # empty header
                    "Content-Disposition": ""
                }),
                (tmp_random, {  # missing header
                }),
                (tmp_random, {  # missing filename
                    "Content-Disposition": "attachment"
                }),
                (tmp_random, {  # invalid filename
                    "Content-Disposition": "attachment; filename*=UTF-8''exec%20'echo%20test'"
                }),
                (tmp_random, {  # invalid encoding
                    "Content-Disposition": "attachment; filename*=random''%47%4F%4F%44.json"
                }),
                ("GOOD.json", {  # valid encoding and allowed characters after escape
                    "Content-Disposition": "attachment; filename*=UTF-8''%47%4F%4F%44.json"
                })
            ]:
                req_mock.remove("GET", tmp_http)  # reset previous iter
                req_mock.add_callback("GET", tmp_http, callback=lambda req: mock_response(req, headers))
                try:
                    res_path = fetch_file(tmp_http, res_dir)
                except Exception as exc:
                    raise AssertionError(f"Unexpected exception when testing with: [{headers}]. Exception: [{exc}]")
                assert res_path == os.path.join(res_dir, target), f"Not expected name when testing with: [{headers}]"
                assert os.path.isfile(res_path), f"File [{tmp_http}] should be accessible under [{res_path}]"
                assert json.load(open(res_path)) == tmp_data, "File should be properly generated from HTTP reference"
        except Exception:
            raise
        finally:
            shutil.rmtree(res_dir, ignore_errors=True)