Beispiel #1
0
    def __init__(self) -> None:
        # enter GeoData in neo4j
        attributes: Optional[List[str]] = None
        graph = neo4j.get_instance()
        with open(DATA_PATH.joinpath("geodata.tsv")) as fd:
            rd = csv.reader(fd, delimiter="\t", quotechar='"')
            for row in rd:
                if not attributes:
                    # use the first row to get the list of attributes
                    attributes = row
                else:
                    props = dict(zip(attributes, row))
                    geodata = graph.GeoData.nodes.get_or_none(
                        **{attributes[0]: row[0]})
                    if not geodata:
                        # create a new one
                        geodata = graph.GeoData(**props).save()
                    else:
                        # check if an update is needed
                        for key, value in props.items():
                            if getattr(geodata, key) != value:
                                setattr(geodata, key, value)
                                geodata.save()

        log.info("GeoData nodes succesfully created")
Beispiel #2
0
        def put(self, force: bool = False) -> Response:

            # This is just to test the allowed exts without adding a new parameter..
            if not force:
                self.set_allowed_exts(["txt"])
            response = self.upload(
                subfolder=DATA_PATH.joinpath("fixsubfolder"), force=force)
            return response
Beispiel #3
0
        def put(self, filename: str, force: bool = False) -> Response:

            path = DATA_PATH.joinpath("fixed")
            completed, response = self.chunk_upload(path, filename)

            if completed:
                log.info("Upload completed")

            return response
Beispiel #4
0
        def get(self,
                folder: str,
                fname: str,
                stream: bool = False) -> Response:
            # The same as defined in test_upload
            subfolder = DATA_PATH.joinpath(folder)

            if stream:
                return Downloader.send_file_streamed(fname,
                                                     subfolder=subfolder)

            return Downloader.send_file_content(fname, subfolder=subfolder)
Beispiel #5
0
        def post(
            self,
            name: str,
            mimeType: str,
            size: int,
            lastModified: int,
            force: bool = False,
        ) -> Response:

            # This is just to test the allowed exts without adding a new parameter..
            if not force:
                self.set_allowed_exts(["txt"])

            path = DATA_PATH.joinpath("fixed")
            return self.init_chunk_upload(path, name, force=force)
Beispiel #6
0
def launch_pipeline(
    self: Task,
    dataset_list: List[str],
    snakefile: str = "Single_Sample.smk",
    force: bool = False,
) -> None:
    task_id = self.request.id
    log.info("Start task [{}:{}]", task_id, self.name)
    # create a unique workdir for every celery task / and snakemake launch)
    wrkdir = DATA_PATH.joinpath("jobs", task_id)
    wrkdir.mkdir(parents=True, exist_ok=True)
    # copy the files used by snakemake in the work dir
    source_dir = Path("/snakemake")
    for snk_file in source_dir.glob("*"):
        if snk_file.is_file():
            shutil.copy(snk_file, wrkdir)

    # get the file list from the dataset list
    file_list = []
    graph = neo4j.get_instance()
    for d in dataset_list:
        # get the path of the dataset directory
        dataset = graph.Dataset.nodes.get_or_none(uuid=d)
        owner = dataset.ownership.single()
        group = owner.belongs_to.single()
        study = dataset.parent_study.single()
        datasetDirectory = INPUT_ROOT.joinpath(group.uuid, study.uuid,
                                               dataset.uuid)
        # check if the directory exists
        if not datasetDirectory.exists():
            # an error should be raised?
            log.warning("Folder for dataset {} not found", d)
            continue
        # append the contained files in the file list
        for f in datasetDirectory.iterdir():
            file_list.append(f)
        # mark the dataset as running
        dataset.status = "RUNNING"
        dataset.save()

    # create a list of fastq files as csv file: fastq.csv
    fastq = []

    # the pattern is check also in the file upload endpoint. This is an additional check
    pattern = r"([a-zA-Z0-9_-]+)_(R[12]).fastq.gz"
    for filepath in file_list:
        fname = filepath.name
        if match := re.match(pattern, fname):
            file_label = match.group(1)
            fragment = match.group(2)

            # get the input path
            input_path = filepath.parent
            # create the output path
            output_path = OUTPUT_ROOT.joinpath(
                input_path.relative_to(INPUT_ROOT))
            output_path.mkdir(parents=True, exist_ok=True)
            if not output_path.joinpath(fname).exists():
                output_path.joinpath(fname).symlink_to(filepath)

            # create row for csv
            fastq_row = [file_label, fragment, input_path, output_path]
            fastq.append(fastq_row)
        else:
            log.info(
                "fastq {} should follow correct naming convention: "
                "SampleName_R1/R2.fastq.gz",
                filepath,
            )
Beispiel #7
0
    def test_simple_upload_and_download(self, client: FlaskClient,
                                        faker: Faker) -> None:

        warnings.filterwarnings(
            "ignore", message="unclosed file <_io.BufferedReader name=")

        self.fcontent = faker.paragraph()
        self.save("fcontent", self.fcontent)
        # as defined in test_upload.py for normal uploads
        upload_folder = "fixsubfolder"

        self.fname = f"{faker.pystr()}.notallowed"

        r = client.put(
            f"{API_URI}/tests/upload",
            data={
                "file": (io.BytesIO(str.encode(self.fcontent)), self.fname),
                # By setting force False only txt files will be allowed for upload
                # Strange, but it is how the endpoint is configured to improve the tests
                "force": False,
            },
        )
        assert r.status_code == 400
        assert self.get_content(r) == "File extension not allowed"

        self.fname = f"{faker.pystr()}.txt"
        self.save("fname", self.fname)

        r = client.put(
            f"{API_URI}/tests/upload",
            data={
                "file": (io.BytesIO(str.encode(self.fcontent)), self.fname),
                # By setting force False only txt files will be allowed for upload
                # Strange, but it is how the endpoint is configured to improve the tests
                "force": False,
            },
        )
        assert r.status_code == 200

        destination_path = DATA_PATH.joinpath(upload_folder, self.fname)
        assert destination_path.exists()
        assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440"

        r = client.put(
            f"{API_URI}/tests/upload",
            data={"file": (io.BytesIO(str.encode(self.fcontent)), self.fname)},
        )
        assert r.status_code == 409
        err = f"File '{self.fname}' already exists, use force parameter to overwrite"
        assert self.get_content(r) == err

        r = client.put(
            f"{API_URI}/tests/upload",
            data={
                "file": (io.BytesIO(str.encode(self.fcontent)), self.fname),
                "force": True,
            },
        )
        assert r.status_code == 200

        destination_path = DATA_PATH.joinpath(upload_folder, self.fname)
        assert destination_path.exists()
        assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440"

        c = self.get_content(r)
        assert isinstance(c, dict)
        assert c.get("filename") == self.fname
        meta = c.get("meta")
        assert meta is not None
        assert meta.get("charset") is not None
        assert meta.get("type") is not None

        self.fname = self.get("fname")
        self.fcontent = self.get("fcontent")
        # as defined in test_upload.py for normal uploads
        upload_folder = "fixsubfolder"

        r = client.get(f"{API_URI}/tests/download/folder/doesnotexist")
        assert r.status_code == 404
        assert self.get_content(r) == "The requested file does not exist"

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{self.fname}")
        assert r.status_code == 200
        content = r.data.decode("utf-8")
        assert content == self.fcontent

        new_content = "new content"
        r = client.put(
            f"{API_URI}/tests/upload",
            data={
                "file": (io.BytesIO(str.encode(new_content)), self.fname),
                "force": True,
            },
        )
        assert r.status_code == 200

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{self.fname}")
        assert r.status_code == 200
        content = r.data.decode("utf-8")
        assert content != self.fcontent
        assert content == new_content

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{self.fname}",
            query_string={"stream": True},
        )
        assert r.status_code == 200
        content = r.data.decode("utf-8")
        assert content == new_content

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/doesnotexist",
            query_string={"stream": True},
        )
        assert r.status_code == 404
Beispiel #8
0
    def test_chunked_upload_and_download(self, client: FlaskClient,
                                         faker: Faker) -> None:

        warnings.filterwarnings(
            "ignore", message="unclosed file <_io.BufferedReader name=")

        self.fname = self.get("fname")
        self.fcontent = self.get("fcontent")

        # as defined in test_upload.py for chunked uploads
        upload_folder = "fixed"

        r = client.post(f"{API_URI}/tests/chunkedupload", data={"force": True})
        assert r.status_code == 400

        filename = "fixed.filename.txt"
        data = {
            "force": True,
            "name": filename,
            "size": "999",
            "mimeType": "application/zip",
            "lastModified": 1590302749209,
        }
        r = client.post(f"{API_URI}/tests/chunkedupload", data=data)
        assert r.status_code == 201
        assert self.get_content(r) == ""
        upload_endpoint = get_location_header(
            r.headers, expected=f"{API_URI}/tests/chunkedupload/{filename}")

        data["force"] = False
        r = client.post(f"{API_URI}/tests/chunkedupload", data=data)
        assert r.status_code == 409
        assert self.get_content(r) == f"File '{filename}' already exists"

        with io.StringIO(faker.text()) as f:
            r = client.put(upload_endpoint, data=f)
        assert r.status_code == 400
        assert self.get_content(r) == "Invalid request"

        with io.StringIO(faker.text()) as f:
            r = client.put(
                upload_endpoint,
                data=f,
                headers={"Content-Range": "!"},
            )
        assert r.status_code == 400
        assert self.get_content(r) == "Invalid request"

        up_data = faker.pystr(min_chars=24, max_chars=48).lower()
        STR_LEN = len(up_data)
        with io.StringIO(up_data[0:5]) as f:
            r = client.put(
                upload_endpoint,
                data=f,
                headers={"Content-Range": f"bytes 0-5/{STR_LEN}"},
            )
        assert r.status_code == 206
        assert self.get_content(r) == "partial"

        destination_path = DATA_PATH.joinpath(upload_folder, filename)
        assert destination_path.exists()
        # The file is still writeable because the upload is in progress
        assert oct(os.stat(destination_path).st_mode & 0o777) != "0o440"

        with io.StringIO(up_data[5:]) as f:
            r = client.put(
                upload_endpoint,
                data=f,
                headers={"Content-Range": f"bytes 5-{STR_LEN}/{STR_LEN}"},
            )
        assert r.status_code == 200
        c = self.get_content(r)
        assert isinstance(c, dict)
        assert c.get("filename") is not None
        uploaded_filename = c.get("filename")
        meta = c.get("meta")
        assert meta is not None
        assert meta.get("charset") == "us-ascii"
        assert meta.get("type") == "text/plain"

        destination_path = DATA_PATH.joinpath(upload_folder, filename)
        assert destination_path.exists()
        assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440"

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}")
        assert r.status_code == 200
        content = r.data.decode("utf-8")
        assert content == up_data

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}")
        assert r.status_code == 200
        content = r.data.decode("utf-8")
        assert content == up_data

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}",
            headers={"Range": ""},
        )
        assert r.status_code == 416

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}",
            headers={"Range": f"0-{STR_LEN - 1}"},
        )
        assert r.status_code == 416

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}",
            headers={"Range": "bytes=0-9999999999999999"},
        )
        assert r.status_code == 206

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}",
            headers={"Range": "bytes=0-4"},
        )
        assert r.status_code == 206
        content = r.data.decode("utf-8")
        assert content == up_data[0:5]

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}",
            headers={"Range": f"bytes=5-{STR_LEN - 1}"},
        )
        assert r.status_code == 206
        content = r.data.decode("utf-8")
        assert content == up_data[5:]

        r = client.get(
            f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}",
            headers={"Range": f"bytes=0-{STR_LEN - 1}"},
        )
        assert r.status_code == 206
        content = r.data.decode("utf-8")
        assert content == up_data

        # Send a new string as content file. Will be appended as prefix
        up_data2 = faker.pystr(min_chars=24, max_chars=48)
        STR_LEN = len(up_data2)
        with io.StringIO(up_data2) as f:
            r = client.put(
                upload_endpoint,
                data=f,
                headers={"Content-Range": f"bytes */{STR_LEN}"},
            )
        assert r.status_code == 503
        assert self.get_content(
            r) == "Permission denied: failed to write the file"

        # force the file to be writeable again
        destination_path = DATA_PATH.joinpath(upload_folder, filename)
        # -rw-rw----
        destination_path.chmod(0o660)

        with io.StringIO(up_data2) as f:
            r = client.put(
                upload_endpoint,
                data=f,
                headers={"Content-Range": f"bytes */{STR_LEN}"},
            )

        assert r.status_code == 200

        destination_path = DATA_PATH.joinpath(upload_folder, filename)
        assert destination_path.exists()
        # File permissions are restored
        assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440"

        # c = self.get_content(r)
        # assert c.get('filename') is not None
        # uploaded_filename = c.get('filename')
        # meta = c.get('meta')
        # assert meta is not None
        # assert meta.get('charset') == 'us-ascii'
        # assert meta.get('type') == 'text/plain'

        # r = client.get(
        #     f'{API_URI}/tests/download/{upload_folder}/{uploaded_filename}'
        # )
        # assert r.status_code == 200
        # content = r.data.decode('utf-8')
        # # Uhmmm... should not be up_data2 + up_data ??
        # assert content == up_data + up_data2

        data["force"] = False
        r = client.post(f"{API_URI}/tests/chunkedupload", data=data)
        assert r.status_code == 409
        err = f"File '{uploaded_filename}' already exists"
        assert self.get_content(r) == err

        data["force"] = True
        r = client.post(f"{API_URI}/tests/chunkedupload", data=data)
        assert r.status_code == 201
        assert self.get_content(r) == ""
        upload_endpoint = get_location_header(
            r.headers, expected=f"{API_URI}/tests/chunkedupload/{filename}")

        data["name"] = "fixed.filename.notallowed"
        data["force"] = False
        r = client.post(f"{API_URI}/tests/chunkedupload", data=data)
        assert r.status_code == 400
        assert self.get_content(r) == "File extension not allowed"

        # Send an upload on a file endpoint not previously initialized
        filename = f"{faker.pystr()}.txt"
        with io.StringIO(up_data2) as f:
            r = client.put(
                f"{API_URI}/tests/chunkedupload/{filename}",
                data=f,
                headers={"Content-Range": f"bytes */{STR_LEN}"},
            )

        assert r.status_code == 503
        error = "Permission denied: the destination file does not exist"
        assert self.get_content(r) == error

        destination_path = DATA_PATH.joinpath(upload_folder, filename)
        assert not destination_path.exists()
Beispiel #9
0
from pathlib import Path
from typing import Any, List, Optional

from restapi.config import DATA_PATH
from restapi.exceptions import BadRequest, NotFound
from restapi.rest.definition import EndpointResource
from restapi.services.authentication import User
from restapi.utilities.logs import log

INPUT_ROOT = DATA_PATH.joinpath("input")
OUTPUT_ROOT = DATA_PATH.joinpath("output")

STUDY_NOT_FOUND = "This study cannot be found or you are not authorized to access"
DATASET_NOT_FOUND = "This dataset cannot be found or you are not authorized to access"
PHENOTYPE_NOT_FOUND = (
    "This phenotype cannot be found or you are not authorized to access")
TECHMETA_NOT_FOUND = (
    "This set of technical metadata cannot be found or you are not authorized to access"
)
FILE_NOT_FOUND = "This file cannot be found or you are not authorized to access"

# Should be the class models, but can't be imported here
Study = Any
Dataset = Any
File = Any


class NIGEndpoint(EndpointResource):
    # group used for test or, in general, groups we don't want to be counted in stats
    GROUPS_TO_FILTER: List[str] = []