Ejemplo n.º 1
0
def write_workflow_attachment(run_id: str, run_request: RunRequest,
                              files: Dict[str, FileStorage]) -> None:
    exe_dir: Path = get_path(run_id, "exe_dir")

    host = request.host_url.strip("/")
    url_prefix = current_app.config['URL_PREFIX'].strip("/")
    endpoint = f"{host}/{url_prefix}".strip("/")
    for file in run_request["workflow_attachment"]:
        if "file_name" in file and "file_url" in file:
            file_name: str = file["file_name"]
            file_url: str = file["file_url"]
            parsed_url = parse.urlparse(file_url)
            if parsed_url.scheme in ["http", "https"] and \
                    not file_url.startswith(endpoint):
                file_path = \
                    exe_dir.joinpath(secure_filepath(file_name)).resolve()
                file_path.parent.mkdir(parents=True, exist_ok=True)
                response: Response = requests.get(file["file_url"])
                with file_path.open(mode="wb") as f:
                    f.write(response.content)

    if current_app.config["WORKFLOW_ATTACHMENT"]:
        workflow_attachment = \
            files.getlist("workflow_attachment[]")  # type: ignore
        for file in workflow_attachment:
            file_name = secure_filepath(file.filename)  # type: ignore
            file_path = exe_dir.joinpath(file_name).resolve()
            file_path.parent.mkdir(parents=True, exist_ok=True)
            file.save(file_path)  # type: ignore
Ejemplo n.º 2
0
def get_runs_id_data(run_id: str, subpath: str = "") -> Response:
    """
    This provides a remote url to download a file or directory under the
    `run_dir` of the sapporo-service.

    - In the case of `path/to/file`, this returns the file.
    - In the case of `path/to/dir`, this returns the list of files under
      directory in JSON format.
    - In the case of `path/to/dir?download=true`, this returns the directory
      in zip format.

    The path is relative to the base directory of each run.
    See `README.md` in sapporo-service for the structure of `run_dir`.
    For example, if you want to download the output `foo.txt`, specify
    something like `outputs/foo.txt`.

    `..` will be ignored.
    """
    validate_run_id(run_id)
    if Path(subpath).name[0] == ".":
        requested_path = \
            secure_filepath(str(Path(subpath).parent)
                            ).joinpath(Path(subpath).name)
    else:
        requested_path = secure_filepath(subpath)
    path = get_run_dir(run_id).joinpath(requested_path)
    if not path.exists():
        parent = Path(f"runs/{run_id}/data").joinpath(requested_path.parent)
        abort(400,
              f"The specified path: {requested_path} does not exist. "
              f"Please make another request to `<endpoint>/{parent}/` again "
              "and check the dir structure.")
    if path.is_file():
        return send_file(path, as_attachment=True)
    else:
        if str2bool(request.args.get("download", False)):
            with NamedTemporaryFile() as f:
                res = make_archive(f.name, "zip",
                                   root_dir=path.parent, base_dir=path.name)
                if "temp_files" not in g:
                    g.temp_files = [Path(f"{f.name}.zip")]
                else:
                    g.temp_files.append(Path(f"{f.name}.zip"))
                return send_file(res, as_attachment=True,
                                 attachment_filename=f"{path.name}.zip")
        else:
            response: Response = jsonify(path_hierarchy(path, path))
            response.status_code = GET_STATUS_CODE
            return response
Ejemplo n.º 3
0
def validate_and_update_run_request(run_id: str,
                                    run_request: RunRequest,
                                    files: Dict[str, FileStorage]) \
        -> RunRequest:
    if current_app.config["REGISTERED_ONLY_MODE"] and \
            "workflow_url" in run_request:
        abort(
            400, "Currently, sapporo-service is running with "
            "registered_only_mode. "
            "Therefore, you need to specify a workflow using "
            "`workflow_name` field. A list of executable workflows can "
            "be retrieved requesting `GET /service-info`")

    if "workflow_name" in run_request:
        wf: Workflow = get_workflow(run_request["workflow_name"])
        run_request["workflow_url"] = wf["workflow_url"]
        run_request["workflow_type"] = wf["workflow_type"]
        run_request["workflow_type_version"] = wf["workflow_type_version"]
        if "workflow_attachment" not in run_request:
            run_request["workflow_attachment"] = wf["workflow_attachment"]

    for field in [
            "workflow_params", "workflow_type", "workflow_type_version",
            "workflow_url", "workflow_engine_name"
    ]:
        if field not in run_request:
            abort(400,
                  f"{field} not included in the form data of the request.")

    if "workflow_attachment" in run_request:
        if type(run_request["workflow_attachment"]) is str:  # type: ignore
            run_request["workflow_attachment"] = \
                json.loads(run_request["workflow_attachment"])  # type: ignore
    else:
        run_request["workflow_attachment"] = []
    if "workflow_engine_parameters" not in run_request:
        run_request["workflow_engine_parameters"] = "{}"
    if "tags" not in run_request:
        run_request["tags"] = "{}"

    if "workflow_name" not in run_request:
        tags = json.loads(run_request["tags"])
        if "workflow_name" in tags:
            run_request["workflow_name"] = tags["workflow_name"]
        else:
            run_request["workflow_name"] = \
                parse.urlparse(run_request["workflow_url"]).path.split("/")[-1]

    if current_app.config["WORKFLOW_ATTACHMENT"]:
        workflow_attachment = \
            files.getlist("workflow_attachment[]")  # type: ignore
        exe_dir: Path = get_path(run_id, "exe_dir")
        host = request.host_url.strip("/")
        url_prefix = current_app.config['URL_PREFIX'].strip("/")
        endpoint = f"{host}/{url_prefix}".strip("/")
        base_remote_url = f"{endpoint}/runs/{run_id}/data/"
        for f in workflow_attachment:
            file_name: Path = secure_filepath(f.filename)
            file_path: Path = exe_dir.joinpath(file_name).resolve()
            run_request["workflow_attachment"].append({
                "file_name":
                str(file_name),
                "file_url":
                base_remote_url + str(file_path.relative_to(exe_dir.parent))
            })

    validate_wf_type(run_request["workflow_type"],
                     run_request["workflow_type_version"])

    validate_meta_charactors("workflow_url", run_request["workflow_url"])
    validate_meta_charactors("workflow_engine_name",
                             run_request["workflow_engine_name"])

    return run_request
Ejemplo n.º 4
0
def test_contain_space() -> None:
    assert secure_filepath("My cool movie.mov") == Path("My_cool_movie.mov")
Ejemplo n.º 5
0
def test_hidden_file() -> None:
    assert secure_filepath(".foo") == Path("foo")
Ejemplo n.º 6
0
def test_DS_STORE() -> None:
    assert secure_filepath("._.DS_STORE") == Path("DS_STORE")
Ejemplo n.º 7
0
def test_only_double_dot() -> None:
    assert secure_filepath("..") == Path("")
Ejemplo n.º 8
0
def test_only_root() -> None:
    assert secure_filepath("/") == Path("")
Ejemplo n.º 9
0
def test_contain_ampersand() -> None:
    assert secure_filepath("/&&/&foo/bar") == Path("foo/bar")
Ejemplo n.º 10
0
def test_contain_fullsize_ampersand() -> None:
    assert secure_filepath("/&foo/bar") == Path("foo/bar")
Ejemplo n.º 11
0
def test_contain_pipe() -> None:
    assert secure_filepath("/||/|foo/bar") == Path("foo/bar")
Ejemplo n.º 12
0
def test_japanese_filename() -> None:
    assert secure_filepath("/フーfoo/バーbar") == Path("foo/bar")
Ejemplo n.º 13
0
def test_contain_umlauts() -> None:
    assert secure_filepath(
        u"i contain cool \xfcml\xe4uts.txt") == \
        Path("i_contain_cool_umlauts.txt")
Ejemplo n.º 14
0
def test_root_dir() -> None:
    assert secure_filepath("/foo/bar") == Path("foo/bar")
Ejemplo n.º 15
0
def test_prev_dir() -> None:
    assert secure_filepath("../../../etc/passwd") == Path("etc/passwd")