Beispiel #1
0
def test_no_neo4j() -> None:

    with pytest.raises(ServiceUnavailable):
        connector.get_instance()

    log.warning("Skipping {} tests: service not available", CONNECTOR)
    return None
Beispiel #2
0
 def get(self, test: str) -> Response:
     self.neo4j = neo4j.get_instance()
     try:
         if test == "1":
             log.info("First Test")
             self.neo4j.cypher("MATCH (n) RETURN n LIMIT 1")
         elif test == "2":
             log.info("Second Test")
             self.neo4j.cypher("MATCH (n) RETURN n with a syntax error")
         # This test will verify that a timestamped node when saved
         # Automatically update the modified attribute
         elif test == "3":
             data = {}
             n = self.neo4j.JustATest(p_str="")
             n.save()
             data["created"] = n.created
             data["modified1"] = n.modified
             n.save()
             data["modified2"] = n.modified
             return self.response(data)
         else:
             log.info("No Test")
     except Exception as e:
         raise BadRequest(str(e))
     return self.response({"val": 1})
Beispiel #3
0
    def get(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()
        dataset = graph.Dataset.nodes.get_or_none(uuid=uuid)
        self.verifyDatasetAccess(dataset, user=user, read=True)

        study = dataset.parent_study.single()

        self.verifyStudyAccess(study,
                               user=user,
                               error_type="Dataset",
                               read=True)

        path = self.getPath(user=user, dataset=dataset, read=True)

        data = []

        for file in dataset.files.all():
            if not path.joinpath(file.name).exists():
                file.status = "unknown"
                file.save()
            else:
                # check if the status is correct
                if file.status == "unknown":
                    filepath = self.getPath(user=user, file=file, read=True)
                    if filepath.stat().st_size != file.size:
                        file.status = "importing"
                    else:
                        file.status = "uploaded"
                    file.save()
            data.append(file)

        return self.response(data)
Beispiel #4
0
    def get(self, query: str, user: User) -> Response:

        # Chars whitelist: letters, numbers, space, colon and hyphen
        if not re.match("^[a-zA-Z0-9 :-]+$", query):
            raise BadRequest("Invalid HPO query")

        cypher = "MATCH (hpo:HPO)"

        regexp = f"(?i).*{query}.*"
        if query.startswith("HP:") and len(query) >= 4:
            cypher += " WHERE hpo.hpo_id =~ $regexp"
        else:
            cypher += " WHERE hpo.label =~ $regexp"
        cypher += " RETURN hpo ORDER BY hpo.hpo_id DESC"
        cypher += " LIMIT 50"

        graph = neo4j.get_instance()
        result = graph.cypher(cypher, regexp=regexp)

        data: List[Dict[str, str]] = []
        for row in result:
            hpo = graph.HPO.inflate(row[0])
            data.append({"hpo_id": hpo.hpo_id, "label": hpo.label})

        return self.response(data)
Beispiel #5
0
    def delete(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        study = graph.Study.nodes.get_or_none(uuid=uuid)
        self.verifyStudyAccess(study, user=user)

        input_path = self.getPath(user=user, study=study)
        output_path = self.getPath(user=user, study=study, get_output_dir=True)

        for d in study.datasets.all():
            for f in d.files.all():
                f.delete()
            d.delete()

        for n in study.phenotypes.all():
            n.delete()

        for n in study.technicals.all():
            n.delete()

        study.delete()

        # remove the study folders
        shutil.rmtree(input_path)
        # if there is an output dir, delete it
        if output_path.is_dir():
            shutil.rmtree(output_path)

        self.log_event(self.events.delete, study)

        return self.empty_response()
Beispiel #6
0
    def get(self, user: User) -> Response:

        graph = neo4j.get_instance()

        data = []
        for t in graph.Study.nodes.order_by().all():

            if not self.verifyStudyAccess(
                    t, user=user, read=True, raiseError=False):
                continue

            study_el = {}
            study_el["uuid"] = t.uuid
            study_el["name"] = t.name
            study_el["description"] = t.description
            study_el["datasets"] = t.datasets
            study_el["phenotypes"] = t.phenotypes
            study_el["technicals"] = t.technicals
            owner = t.ownership.single()
            if owner == user:
                study_el["readonly"] = False

            for group in owner.belongs_to.all():
                study_el["owning_group_name"] = group.fullname
                if group.members.is_connected(user):
                    study_el["readonly"] = False

            data.append(study_el)

        return self.response(data)
Beispiel #7
0
    def delete(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        # INIT #
        dataset = graph.Dataset.nodes.get_or_none(uuid=uuid)
        self.verifyDatasetAccess(dataset, user=user)

        study = dataset.parent_study.single()
        self.verifyStudyAccess(study, user=user, error_type="Dataset")
        input_path = self.getPath(user=user, dataset=dataset)
        output_path = self.getPath(user=user,
                                   dataset=dataset,
                                   get_output_dir=True)

        for f in dataset.files.all():
            f.delete()

        dataset.delete()

        # remove the dataset folder
        shutil.rmtree(input_path)
        # if it's present remove the dataset folder
        if output_path.is_dir():
            shutil.rmtree(output_path)

        self.log_event(self.events.delete, dataset)

        return self.empty_response()
Beispiel #8
0
    def get(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        study = graph.Study.nodes.get_or_none(uuid=uuid)
        self.verifyStudyAccess(study, user=user, read=True)

        data = []
        for dataset in study.datasets.all():

            if not self.verifyDatasetAccess(
                    dataset, user=user, read=True, raiseError=False):
                continue
            dataset_el = {}
            dataset_el["uuid"] = dataset.uuid
            dataset_el["name"] = dataset.name
            dataset_el["description"] = dataset.description
            dataset_el["status"] = dataset.status
            dataset_el["technical"] = dataset.technical
            dataset_el["phenotype"] = dataset.phenotype
            dataset_el["files"] = dataset.files

            owner = dataset.ownership.single()
            if owner == user:
                dataset_el["readonly"] = False

            for group in owner.belongs_to.all():
                if group.members.is_connected(user):
                    dataset_el["readonly"] = False

            data.append(dataset_el)

        return self.response(data)
Beispiel #9
0
    def __init__(self) -> None:
        # enter GeoData in neo4j
        attributes: Optional[List[str]] = None
        graph = neo4j.get_instance()
        with open(DATA_PATH.joinpath("geodata.tsv")) as fd:
            rd = csv.reader(fd, delimiter="\t", quotechar='"')
            for row in rd:
                if not attributes:
                    # use the first row to get the list of attributes
                    attributes = row
                else:
                    props = dict(zip(attributes, row))
                    geodata = graph.GeoData.nodes.get_or_none(
                        **{attributes[0]: row[0]})
                    if not geodata:
                        # create a new one
                        geodata = graph.GeoData(**props).save()
                    else:
                        # check if an update is needed
                        for key, value in props.items():
                            if getattr(geodata, key) != value:
                                setattr(geodata, key, value)
                                geodata.save()

        log.info("GeoData nodes succesfully created")
Beispiel #10
0
    def get(self, user: User) -> Response:

        graph = neo4j.get_instance()

        data: Dict[str, Union[int, Dict[str, int]]] = {}
        data["num_users"] = count_nodes(graph, "count_users")
        data["num_studies"] = count_nodes(graph, "count_studies")

        data["num_datasets"] = count_nodes(graph, "count_datasets")
        data["num_datasets_per_group"] = count_by_group(
            graph, "count_datasets")

        data["num_datasets_with_vcf"] = count_nodes(graph,
                                                    "count_dataset_with_vcf")
        data["num_datasets_with_vcf_per_group"] = count_by_group(
            graph, "count_dataset_with_vcf")

        data["num_datasets_with_gvcf"] = count_nodes(
            graph, "count_dataset_with_gvcf")
        data["num_datasets_with_gvcf_per_group"] = count_by_group(
            graph, "count_dataset_with_gvcf")

        data["num_files"] = count_nodes(graph, "count_files")

        return self.response(data)
def verify_phenotype_access(
    endpoint: NIGEndpoint, uuid: str, user: User
) -> Dict[str, Any]:
    graph = neo4j.get_instance()
    phenotype = graph.Phenotype.nodes.get_or_none(uuid=uuid)
    if phenotype is None:
        raise NotFound(PHENOTYPE_NOT_FOUND)
    study = phenotype.defined_in.single()
    endpoint.verifyStudyAccess(study, user=user, error_type="Phenotype")
    return {"phenotype": phenotype, "study": study}
Beispiel #12
0
    def get(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        study = graph.Study.nodes.get_or_none(uuid=uuid)
        self.verifyStudyAccess(study, user=user, read=True)

        self.log_event(self.events.access, study)

        return self.response(study)
Beispiel #13
0
    def put(self, uuid: str, filename: str, user: User) -> Response:

        graph = neo4j.get_instance()
        # check permission
        dataset = graph.Dataset.nodes.get_or_none(uuid=uuid)
        self.verifyDatasetAccess(dataset, user=user)

        study = dataset.parent_study.single()
        self.verifyStudyAccess(study, user=user, error_type="Dataset")

        path = self.getPath(user=user, dataset=dataset)
        completed, response = self.chunk_upload(Path(path), filename)
        log.debug("check {}", response)
        if completed:
            # get the file
            file = None
            for f in dataset.files.all():
                if f.name == filename:
                    file = f
            if not file:
                raise NotFound(FILE_NOT_FOUND)

            # check the final size
            filepath = self.getPath(user=user, file=file)
            filesize = filepath.stat().st_size
            # check the final size
            if filesize != file.size:
                log.debug(
                    "size expected: {},actual size: {}",
                    file.size,
                    filesize,
                )
                file.delete()
                graph.db.commit()
                filepath.unlink()
                raise ServerError(
                    "File has not been uploaded correctly: final size does not "
                    "correspond to total size. Please try a new upload", )
            # check the content of the file
            file_validation = validate_gzipped_fastq(filepath)
            if not file_validation[0]:
                # delete the file
                file.delete()
                graph.db.commit()
                filepath.unlink()
                raise BadRequest(file_validation[1])
            file.status = "uploaded"
            file.save()
            self.log_event(
                self.events.create,
                file,
                {filename: f"Upload completed in dataset {uuid}"},
            )

        return response
def verify_dataset_access(
    endpoint: NIGEndpoint, uuid: str, user: User
) -> Dict[str, Any]:
    graph = neo4j.get_instance()
    dataset = graph.Dataset.nodes.get_or_none(uuid=uuid)
    endpoint.verifyDatasetAccess(dataset, user=user)

    study = dataset.parent_study.single()
    endpoint.verifyStudyAccess(study, user=user, error_type="Dataset")

    return {"dataset": dataset, "study": study}
Beispiel #15
0
    def delete(self, uuid1: str, uuid2: str, user: User) -> Response:

        graph = neo4j.get_instance()

        phenotype1 = graph.Phenotype.nodes.get_or_none(uuid=uuid1)
        if phenotype1 is None:
            raise NotFound(PHENOTYPE_NOT_FOUND)

        study = phenotype1.defined_in.single()
        self.verifyStudyAccess(study,
                               user=user,
                               error_type="Phenotype",
                               read=False)

        phenotype2 = graph.Phenotype.nodes.get_or_none(uuid=uuid2)
        if phenotype2 is None:
            raise NotFound(PHENOTYPE_NOT_FOUND)

        study = phenotype2.defined_in.single()
        self.verifyStudyAccess(study,
                               user=user,
                               error_type="Phenotype",
                               read=False)

        # [1] - FATHER -> [2]
        if phenotype1.father.is_connected(phenotype2):
            phenotype1.father.disconnect(phenotype2)
            # delete son relationship
            phenotype2.son.disconnect(phenotype1)
        # [] - MOTHER -> [2]
        elif phenotype1.mother.is_connected(phenotype2):
            phenotype1.mother.disconnect(phenotype2)
            # delete son relationship
            phenotype2.son.disconnect(phenotype1)

        # [1] <- FATHER - [2]  _or_  [1] <- MOTHER - [2]
        elif phenotype1.son.is_connected(phenotype2):
            phenotype1.son.disconnect(phenotype2)
            # delete mother or father relationship
            if phenotype2.mother.is_connected(phenotype1):
                phenotype2.mother.disconnect(phenotype1)
            if phenotype2.father.is_connected(phenotype1):
                phenotype2.father.disconnect(phenotype1)

        self.log_event(
            self.events.modify,
            phenotype1,
            {
                "relationship": "removed",
                "target": phenotype2.uuid
            },
        )
        return self.empty_response()
Beispiel #16
0
    def put(self, uuid: str, user: User, **kwargs: Any) -> Response:

        graph = neo4j.get_instance()

        study = graph.Study.nodes.get_or_none(uuid=uuid)
        self.verifyStudyAccess(study, user=user)

        graph.update_properties(study, kwargs)
        study.save()

        self.log_event(self.events.modify, study, kwargs)

        return self.empty_response()
Beispiel #17
0
    def post(self, uuid1: str, uuid2: str, user: User) -> Response:

        graph = neo4j.get_instance()

        if uuid1 == uuid2:
            raise BadRequest(
                f"Cannot set relationship between {uuid1} and itself")

        phenotype1 = graph.Phenotype.nodes.get_or_none(uuid=uuid1)
        if phenotype1 is None:
            raise NotFound(PHENOTYPE_NOT_FOUND)

        study = phenotype1.defined_in.single()
        self.verifyStudyAccess(study,
                               user=user,
                               error_type="Phenotype",
                               read=False)

        phenotype2 = graph.Phenotype.nodes.get_or_none(uuid=uuid2)
        if phenotype2 is None:
            raise NotFound(PHENOTYPE_NOT_FOUND)

        study = phenotype2.defined_in.single()
        self.verifyStudyAccess(study,
                               user=user,
                               error_type="Phenotype",
                               read=False)

        # check parent sex

        if phenotype2.sex == "male":
            relationship = "father"
            phenotype2.son.connect(phenotype1)
            phenotype1.father.connect(phenotype2)

        elif phenotype2.sex == "female":
            relationship = "mother"
            phenotype2.son.connect(phenotype1)
            phenotype1.mother.connect(phenotype2)

        self.log_event(
            self.events.modify,
            phenotype1,
            {
                "relationship": relationship,
                "target": phenotype2.uuid
            },
        )
        res = {"uuid": phenotype2.uuid, "name": phenotype2.name}
        return self.response(res)
Beispiel #18
0
    def get(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        study = graph.Study.nodes.get_or_none(uuid=uuid)
        self.verifyStudyAccess(study, user=user, read=True)
        nodeset = study.technicals

        data = []
        for techmeta in nodeset.all():

            data.append(techmeta)

        return self.response(data)
Beispiel #19
0
    def get(self) -> Response:

        graph = neo4j.get_instance()

        data = {}
        data["num_users"] = count_nodes(graph, "count_users")
        data["num_studies"] = count_nodes(graph, "count_studies")

        data["num_datasets"] = count_nodes(graph, "count_datasets")
        data["num_datasets_with_vcf"] = count_nodes(graph,
                                                    "count_dataset_with_vcf")

        data["num_files"] = count_nodes(graph, "count_files")

        return self.response(data)
Beispiel #20
0
    def verify_indexes(label: str, key: str) -> None:
        graph = neo4j.get_instance()
        indexes = graph.cypher("CALL db.indexes()")
        for index in indexes:
            labelsOrTypes = index[7]
            properties = index[8]

            if len(labelsOrTypes) == 1 and len(properties) == 1:
                if labelsOrTypes[0] == label and properties[0] == key:
                    log.debug("Found an index for {}.{}", label, key)
                    break
        else:
            raise ValueError(
                f"Can't find an index for {label}.{key}: "
                "add an index or skip this check with ignore_indexes=True")
Beispiel #21
0
    def get(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        dataset = graph.Dataset.nodes.get_or_none(uuid=uuid)
        self.verifyDatasetAccess(dataset, user=user, read=True)

        study = dataset.parent_study.single()
        self.verifyStudyAccess(study,
                               user=user,
                               error_type="Dataset",
                               read=True)

        self.log_event(self.events.access, dataset)
        return self.response(dataset)
    def delete(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        phenotype = graph.Phenotype.nodes.get_or_none(uuid=uuid)
        if phenotype is None:
            raise NotFound(PHENOTYPE_NOT_FOUND)
        study = phenotype.defined_in.single()
        self.verifyStudyAccess(study, user=user, error_type="Phenotype")

        phenotype.delete()

        self.log_event(self.events.delete, phenotype)

        return self.empty_response()
Beispiel #23
0
    def delete(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        techmeta = graph.TechnicalMetadata.nodes.get_or_none(uuid=uuid)
        if techmeta is None:
            raise NotFound(TECHMETA_NOT_FOUND)
        study = techmeta.defined_in.single()
        self.verifyStudyAccess(study, user=user, error_type="Technical Metadata")

        techmeta.delete()

        self.log_event(self.events.delete, techmeta)

        return self.empty_response()
Beispiel #24
0
    def get(self, uuid: str, user: User) -> Response:

        graph = neo4j.get_instance()

        techmeta = graph.TechnicalMetadata.nodes.get_or_none(uuid=uuid)
        if not techmeta:
            raise NotFound(TECHMETA_NOT_FOUND)
        study = techmeta.defined_in.single()
        self.verifyStudyAccess(
            study, user=user, error_type="Technical Metadata", read=True
        )

        self.log_event(self.events.access, techmeta)

        return self.response(techmeta)
Beispiel #25
0
    def post(
        self,
        uuid: str,
        name: str,
        description: str,
        # should be an instance of neo4j.Study,
        # but typing is still not working with neomodel
        study: Any,
        user: User,
        phenotype: Optional[str] = None,
        technical: Optional[str] = None,
    ) -> Response:

        graph = neo4j.get_instance()

        kwargs = {"name": name, "description": description}
        dataset = graph.Dataset(**kwargs).save()

        dataset.ownership.connect(user)
        dataset.parent_study.connect(study)
        if phenotype:
            kwargs["phenotype"] = phenotype
            phenotype = study.phenotypes.get_or_none(uuid=phenotype)
            if phenotype is None:  # pragma: no cover
                raise NotFound(PHENOTYPE_NOT_FOUND)
            dataset.phenotype.connect(phenotype)
        if technical:
            kwargs["technical"] = technical
            technical = study.technicals.get_or_none(uuid=technical)
            if technical is None:  # pragma: no cover
                raise NotFound(TECHMETA_NOT_FOUND)
            dataset.technical.connect(technical)

        path = self.getPath(user=user, dataset=dataset)

        try:
            path.mkdir(parents=True, exist_ok=False)
        # Almost impossible to have the same uuid was already used for an other study
        except FileExistsError as exc:  # pragma: no cover
            dataset.delete()
            raise Conflict(str(exc))

        self.log_event(self.events.create, dataset, kwargs)

        return self.response(dataset.uuid)
def getInputSchema(request: FlaskRequest, is_post: bool) -> Type[Schema]:
    graph = neo4j.get_instance()
    # as defined in Marshmallow.schema.from_dict
    attributes: Dict[str, Union[fields.Field, type]] = {}

    attributes["name"] = fields.Str(required=True)
    attributes["age"] = fields.Integer(allow_none=True, validate=validate.Range(min=0))
    attributes["sex"] = fields.Str(
        required=True, validate=validate.OneOf(SEX), metadata={"description": ""}
    )
    attributes["hpo"] = fields.List(
        fields.Str(),
        metadata={
            "label": "HPO",
            "autocomplete_endpoint": "/api/hpo",
            "autocomplete_show_id": True,
            "autocomplete_id_bind": "hpo_id",
            "autocomplete_label_bind": "label",
        },
    )

    geodata_keys = []
    geodata_labels = []

    for g in graph.GeoData.nodes.all():
        geodata_keys.append(g.uuid)
        geodata_labels.append(g.province)

    if len(geodata_keys) == 1:
        default_geodata = geodata_keys[0]
    else:
        default_geodata = None

    attributes["birth_place"] = fields.Str(
        required=False,
        allow_none=True,
        metadata={
            "label": "Birth Place",
            "description": "",
        },
        dump_default=default_geodata,
        validate=validate.OneOf(choices=geodata_keys, labels=geodata_labels),
    )

    return Schema.from_dict(attributes, name="PhenotypeDefinition")
Beispiel #27
0
    def post(self, uuid: str, name: str, user: User,
             **kwargs: Any) -> Response:

        # check permissions
        graph = neo4j.get_instance()
        dataset = graph.Dataset.nodes.get_or_none(uuid=uuid)
        self.verifyDatasetAccess(dataset, user=user)

        study = dataset.parent_study.single()
        self.verifyStudyAccess(study, user=user, error_type="Dataset")

        path = self.getPath(user=user, dataset=dataset)

        # check if the filename is correct
        name_pattern = r"([a-zA-Z0-9_-]+)_(R[12]).fastq.gz"
        if not re.match(name_pattern, name):
            raise BadRequest(
                "Filename does not follow the correct naming convention: "
                "SampleName_R1/R2.fastq.gz")

        # set the allowed file format
        self.set_allowed_exts(["gz"])

        properties = {
            "name": name,
            "size": kwargs["size"],
            # Currently fixed
            "type": "fastq.gz",
            "status": "importing",
        }

        file = graph.File(**properties).save()

        file.dataset.connect(dataset)

        self.log_event(
            self.events.create,
            file,
            {
                "operation":
                f"Accepted upload for {name} file in {uuid} dataset"
            },
        )

        return self.init_chunk_upload(Path(path), name, force=False)
Beispiel #28
0
    def post(self, uuid: str, user: User, **kwargs: Any) -> Response:

        graph = neo4j.get_instance()

        study = graph.Study.nodes.get_or_none(uuid=uuid)
        self.verifyStudyAccess(study, user=user)

        # kit = properties.get("enrichment_kit", None)
        # if kit is not None and "value" in kit:
        #     properties["enrichment_kit"] = kit["value"]

        techmeta = graph.TechnicalMetadata(**kwargs).save()

        techmeta.defined_in.connect(study)

        self.log_event(self.events.create, techmeta, kwargs)

        return self.response(techmeta.uuid)
Beispiel #29
0
    def post(self, user: User, **kwargs: Any) -> Response:

        graph = neo4j.get_instance()

        study = graph.Study(**kwargs).save()

        study.ownership.connect(user)

        path = self.getPath(user=user, study=study)

        try:
            path.mkdir(parents=True, exist_ok=False)
        except FileExistsError as exc:  # pragma: no cover
            # Almost impossible the have same uuid was already used for an other study
            study.delete()
            raise Conflict(str(exc))

        self.log_event(self.events.create, study, kwargs)

        return self.response(study.uuid)
Beispiel #30
0
    def put(self, uuid: str, user: User, **kwargs: Any) -> Response:

        graph = neo4j.get_instance()

        techmeta = graph.TechnicalMetadata.nodes.get_or_none(uuid=uuid)
        if techmeta is None:
            raise NotFound(TECHMETA_NOT_FOUND)
        study = techmeta.defined_in.single()
        self.verifyStudyAccess(study, user=user, error_type="Technical Metadata")

        # kit = v.get("enrichment_kit", None)
        # if kit is not None and "value" in kit:
        #     v["enrichment_kit"] = kit["value"]

        graph.update_properties(techmeta, kwargs)
        techmeta.save()

        self.log_event(self.events.modify, techmeta, kwargs)

        return self.empty_response()