def test_no_neo4j() -> None: with pytest.raises(ServiceUnavailable): connector.get_instance() log.warning("Skipping {} tests: service not available", CONNECTOR) return None
def get(self, test: str) -> Response: self.neo4j = neo4j.get_instance() try: if test == "1": log.info("First Test") self.neo4j.cypher("MATCH (n) RETURN n LIMIT 1") elif test == "2": log.info("Second Test") self.neo4j.cypher("MATCH (n) RETURN n with a syntax error") # This test will verify that a timestamped node when saved # Automatically update the modified attribute elif test == "3": data = {} n = self.neo4j.JustATest(p_str="") n.save() data["created"] = n.created data["modified1"] = n.modified n.save() data["modified2"] = n.modified return self.response(data) else: log.info("No Test") except Exception as e: raise BadRequest(str(e)) return self.response({"val": 1})
def get(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() dataset = graph.Dataset.nodes.get_or_none(uuid=uuid) self.verifyDatasetAccess(dataset, user=user, read=True) study = dataset.parent_study.single() self.verifyStudyAccess(study, user=user, error_type="Dataset", read=True) path = self.getPath(user=user, dataset=dataset, read=True) data = [] for file in dataset.files.all(): if not path.joinpath(file.name).exists(): file.status = "unknown" file.save() else: # check if the status is correct if file.status == "unknown": filepath = self.getPath(user=user, file=file, read=True) if filepath.stat().st_size != file.size: file.status = "importing" else: file.status = "uploaded" file.save() data.append(file) return self.response(data)
def get(self, query: str, user: User) -> Response: # Chars whitelist: letters, numbers, space, colon and hyphen if not re.match("^[a-zA-Z0-9 :-]+$", query): raise BadRequest("Invalid HPO query") cypher = "MATCH (hpo:HPO)" regexp = f"(?i).*{query}.*" if query.startswith("HP:") and len(query) >= 4: cypher += " WHERE hpo.hpo_id =~ $regexp" else: cypher += " WHERE hpo.label =~ $regexp" cypher += " RETURN hpo ORDER BY hpo.hpo_id DESC" cypher += " LIMIT 50" graph = neo4j.get_instance() result = graph.cypher(cypher, regexp=regexp) data: List[Dict[str, str]] = [] for row in result: hpo = graph.HPO.inflate(row[0]) data.append({"hpo_id": hpo.hpo_id, "label": hpo.label}) return self.response(data)
def delete(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() study = graph.Study.nodes.get_or_none(uuid=uuid) self.verifyStudyAccess(study, user=user) input_path = self.getPath(user=user, study=study) output_path = self.getPath(user=user, study=study, get_output_dir=True) for d in study.datasets.all(): for f in d.files.all(): f.delete() d.delete() for n in study.phenotypes.all(): n.delete() for n in study.technicals.all(): n.delete() study.delete() # remove the study folders shutil.rmtree(input_path) # if there is an output dir, delete it if output_path.is_dir(): shutil.rmtree(output_path) self.log_event(self.events.delete, study) return self.empty_response()
def get(self, user: User) -> Response: graph = neo4j.get_instance() data = [] for t in graph.Study.nodes.order_by().all(): if not self.verifyStudyAccess( t, user=user, read=True, raiseError=False): continue study_el = {} study_el["uuid"] = t.uuid study_el["name"] = t.name study_el["description"] = t.description study_el["datasets"] = t.datasets study_el["phenotypes"] = t.phenotypes study_el["technicals"] = t.technicals owner = t.ownership.single() if owner == user: study_el["readonly"] = False for group in owner.belongs_to.all(): study_el["owning_group_name"] = group.fullname if group.members.is_connected(user): study_el["readonly"] = False data.append(study_el) return self.response(data)
def delete(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() # INIT # dataset = graph.Dataset.nodes.get_or_none(uuid=uuid) self.verifyDatasetAccess(dataset, user=user) study = dataset.parent_study.single() self.verifyStudyAccess(study, user=user, error_type="Dataset") input_path = self.getPath(user=user, dataset=dataset) output_path = self.getPath(user=user, dataset=dataset, get_output_dir=True) for f in dataset.files.all(): f.delete() dataset.delete() # remove the dataset folder shutil.rmtree(input_path) # if it's present remove the dataset folder if output_path.is_dir(): shutil.rmtree(output_path) self.log_event(self.events.delete, dataset) return self.empty_response()
def get(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() study = graph.Study.nodes.get_or_none(uuid=uuid) self.verifyStudyAccess(study, user=user, read=True) data = [] for dataset in study.datasets.all(): if not self.verifyDatasetAccess( dataset, user=user, read=True, raiseError=False): continue dataset_el = {} dataset_el["uuid"] = dataset.uuid dataset_el["name"] = dataset.name dataset_el["description"] = dataset.description dataset_el["status"] = dataset.status dataset_el["technical"] = dataset.technical dataset_el["phenotype"] = dataset.phenotype dataset_el["files"] = dataset.files owner = dataset.ownership.single() if owner == user: dataset_el["readonly"] = False for group in owner.belongs_to.all(): if group.members.is_connected(user): dataset_el["readonly"] = False data.append(dataset_el) return self.response(data)
def __init__(self) -> None: # enter GeoData in neo4j attributes: Optional[List[str]] = None graph = neo4j.get_instance() with open(DATA_PATH.joinpath("geodata.tsv")) as fd: rd = csv.reader(fd, delimiter="\t", quotechar='"') for row in rd: if not attributes: # use the first row to get the list of attributes attributes = row else: props = dict(zip(attributes, row)) geodata = graph.GeoData.nodes.get_or_none( **{attributes[0]: row[0]}) if not geodata: # create a new one geodata = graph.GeoData(**props).save() else: # check if an update is needed for key, value in props.items(): if getattr(geodata, key) != value: setattr(geodata, key, value) geodata.save() log.info("GeoData nodes succesfully created")
def get(self, user: User) -> Response: graph = neo4j.get_instance() data: Dict[str, Union[int, Dict[str, int]]] = {} data["num_users"] = count_nodes(graph, "count_users") data["num_studies"] = count_nodes(graph, "count_studies") data["num_datasets"] = count_nodes(graph, "count_datasets") data["num_datasets_per_group"] = count_by_group( graph, "count_datasets") data["num_datasets_with_vcf"] = count_nodes(graph, "count_dataset_with_vcf") data["num_datasets_with_vcf_per_group"] = count_by_group( graph, "count_dataset_with_vcf") data["num_datasets_with_gvcf"] = count_nodes( graph, "count_dataset_with_gvcf") data["num_datasets_with_gvcf_per_group"] = count_by_group( graph, "count_dataset_with_gvcf") data["num_files"] = count_nodes(graph, "count_files") return self.response(data)
def verify_phenotype_access( endpoint: NIGEndpoint, uuid: str, user: User ) -> Dict[str, Any]: graph = neo4j.get_instance() phenotype = graph.Phenotype.nodes.get_or_none(uuid=uuid) if phenotype is None: raise NotFound(PHENOTYPE_NOT_FOUND) study = phenotype.defined_in.single() endpoint.verifyStudyAccess(study, user=user, error_type="Phenotype") return {"phenotype": phenotype, "study": study}
def get(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() study = graph.Study.nodes.get_or_none(uuid=uuid) self.verifyStudyAccess(study, user=user, read=True) self.log_event(self.events.access, study) return self.response(study)
def put(self, uuid: str, filename: str, user: User) -> Response: graph = neo4j.get_instance() # check permission dataset = graph.Dataset.nodes.get_or_none(uuid=uuid) self.verifyDatasetAccess(dataset, user=user) study = dataset.parent_study.single() self.verifyStudyAccess(study, user=user, error_type="Dataset") path = self.getPath(user=user, dataset=dataset) completed, response = self.chunk_upload(Path(path), filename) log.debug("check {}", response) if completed: # get the file file = None for f in dataset.files.all(): if f.name == filename: file = f if not file: raise NotFound(FILE_NOT_FOUND) # check the final size filepath = self.getPath(user=user, file=file) filesize = filepath.stat().st_size # check the final size if filesize != file.size: log.debug( "size expected: {},actual size: {}", file.size, filesize, ) file.delete() graph.db.commit() filepath.unlink() raise ServerError( "File has not been uploaded correctly: final size does not " "correspond to total size. Please try a new upload", ) # check the content of the file file_validation = validate_gzipped_fastq(filepath) if not file_validation[0]: # delete the file file.delete() graph.db.commit() filepath.unlink() raise BadRequest(file_validation[1]) file.status = "uploaded" file.save() self.log_event( self.events.create, file, {filename: f"Upload completed in dataset {uuid}"}, ) return response
def verify_dataset_access( endpoint: NIGEndpoint, uuid: str, user: User ) -> Dict[str, Any]: graph = neo4j.get_instance() dataset = graph.Dataset.nodes.get_or_none(uuid=uuid) endpoint.verifyDatasetAccess(dataset, user=user) study = dataset.parent_study.single() endpoint.verifyStudyAccess(study, user=user, error_type="Dataset") return {"dataset": dataset, "study": study}
def delete(self, uuid1: str, uuid2: str, user: User) -> Response: graph = neo4j.get_instance() phenotype1 = graph.Phenotype.nodes.get_or_none(uuid=uuid1) if phenotype1 is None: raise NotFound(PHENOTYPE_NOT_FOUND) study = phenotype1.defined_in.single() self.verifyStudyAccess(study, user=user, error_type="Phenotype", read=False) phenotype2 = graph.Phenotype.nodes.get_or_none(uuid=uuid2) if phenotype2 is None: raise NotFound(PHENOTYPE_NOT_FOUND) study = phenotype2.defined_in.single() self.verifyStudyAccess(study, user=user, error_type="Phenotype", read=False) # [1] - FATHER -> [2] if phenotype1.father.is_connected(phenotype2): phenotype1.father.disconnect(phenotype2) # delete son relationship phenotype2.son.disconnect(phenotype1) # [] - MOTHER -> [2] elif phenotype1.mother.is_connected(phenotype2): phenotype1.mother.disconnect(phenotype2) # delete son relationship phenotype2.son.disconnect(phenotype1) # [1] <- FATHER - [2] _or_ [1] <- MOTHER - [2] elif phenotype1.son.is_connected(phenotype2): phenotype1.son.disconnect(phenotype2) # delete mother or father relationship if phenotype2.mother.is_connected(phenotype1): phenotype2.mother.disconnect(phenotype1) if phenotype2.father.is_connected(phenotype1): phenotype2.father.disconnect(phenotype1) self.log_event( self.events.modify, phenotype1, { "relationship": "removed", "target": phenotype2.uuid }, ) return self.empty_response()
def put(self, uuid: str, user: User, **kwargs: Any) -> Response: graph = neo4j.get_instance() study = graph.Study.nodes.get_or_none(uuid=uuid) self.verifyStudyAccess(study, user=user) graph.update_properties(study, kwargs) study.save() self.log_event(self.events.modify, study, kwargs) return self.empty_response()
def post(self, uuid1: str, uuid2: str, user: User) -> Response: graph = neo4j.get_instance() if uuid1 == uuid2: raise BadRequest( f"Cannot set relationship between {uuid1} and itself") phenotype1 = graph.Phenotype.nodes.get_or_none(uuid=uuid1) if phenotype1 is None: raise NotFound(PHENOTYPE_NOT_FOUND) study = phenotype1.defined_in.single() self.verifyStudyAccess(study, user=user, error_type="Phenotype", read=False) phenotype2 = graph.Phenotype.nodes.get_or_none(uuid=uuid2) if phenotype2 is None: raise NotFound(PHENOTYPE_NOT_FOUND) study = phenotype2.defined_in.single() self.verifyStudyAccess(study, user=user, error_type="Phenotype", read=False) # check parent sex if phenotype2.sex == "male": relationship = "father" phenotype2.son.connect(phenotype1) phenotype1.father.connect(phenotype2) elif phenotype2.sex == "female": relationship = "mother" phenotype2.son.connect(phenotype1) phenotype1.mother.connect(phenotype2) self.log_event( self.events.modify, phenotype1, { "relationship": relationship, "target": phenotype2.uuid }, ) res = {"uuid": phenotype2.uuid, "name": phenotype2.name} return self.response(res)
def get(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() study = graph.Study.nodes.get_or_none(uuid=uuid) self.verifyStudyAccess(study, user=user, read=True) nodeset = study.technicals data = [] for techmeta in nodeset.all(): data.append(techmeta) return self.response(data)
def get(self) -> Response: graph = neo4j.get_instance() data = {} data["num_users"] = count_nodes(graph, "count_users") data["num_studies"] = count_nodes(graph, "count_studies") data["num_datasets"] = count_nodes(graph, "count_datasets") data["num_datasets_with_vcf"] = count_nodes(graph, "count_dataset_with_vcf") data["num_files"] = count_nodes(graph, "count_files") return self.response(data)
def verify_indexes(label: str, key: str) -> None: graph = neo4j.get_instance() indexes = graph.cypher("CALL db.indexes()") for index in indexes: labelsOrTypes = index[7] properties = index[8] if len(labelsOrTypes) == 1 and len(properties) == 1: if labelsOrTypes[0] == label and properties[0] == key: log.debug("Found an index for {}.{}", label, key) break else: raise ValueError( f"Can't find an index for {label}.{key}: " "add an index or skip this check with ignore_indexes=True")
def get(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() dataset = graph.Dataset.nodes.get_or_none(uuid=uuid) self.verifyDatasetAccess(dataset, user=user, read=True) study = dataset.parent_study.single() self.verifyStudyAccess(study, user=user, error_type="Dataset", read=True) self.log_event(self.events.access, dataset) return self.response(dataset)
def delete(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() phenotype = graph.Phenotype.nodes.get_or_none(uuid=uuid) if phenotype is None: raise NotFound(PHENOTYPE_NOT_FOUND) study = phenotype.defined_in.single() self.verifyStudyAccess(study, user=user, error_type="Phenotype") phenotype.delete() self.log_event(self.events.delete, phenotype) return self.empty_response()
def delete(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() techmeta = graph.TechnicalMetadata.nodes.get_or_none(uuid=uuid) if techmeta is None: raise NotFound(TECHMETA_NOT_FOUND) study = techmeta.defined_in.single() self.verifyStudyAccess(study, user=user, error_type="Technical Metadata") techmeta.delete() self.log_event(self.events.delete, techmeta) return self.empty_response()
def get(self, uuid: str, user: User) -> Response: graph = neo4j.get_instance() techmeta = graph.TechnicalMetadata.nodes.get_or_none(uuid=uuid) if not techmeta: raise NotFound(TECHMETA_NOT_FOUND) study = techmeta.defined_in.single() self.verifyStudyAccess( study, user=user, error_type="Technical Metadata", read=True ) self.log_event(self.events.access, techmeta) return self.response(techmeta)
def post( self, uuid: str, name: str, description: str, # should be an instance of neo4j.Study, # but typing is still not working with neomodel study: Any, user: User, phenotype: Optional[str] = None, technical: Optional[str] = None, ) -> Response: graph = neo4j.get_instance() kwargs = {"name": name, "description": description} dataset = graph.Dataset(**kwargs).save() dataset.ownership.connect(user) dataset.parent_study.connect(study) if phenotype: kwargs["phenotype"] = phenotype phenotype = study.phenotypes.get_or_none(uuid=phenotype) if phenotype is None: # pragma: no cover raise NotFound(PHENOTYPE_NOT_FOUND) dataset.phenotype.connect(phenotype) if technical: kwargs["technical"] = technical technical = study.technicals.get_or_none(uuid=technical) if technical is None: # pragma: no cover raise NotFound(TECHMETA_NOT_FOUND) dataset.technical.connect(technical) path = self.getPath(user=user, dataset=dataset) try: path.mkdir(parents=True, exist_ok=False) # Almost impossible to have the same uuid was already used for an other study except FileExistsError as exc: # pragma: no cover dataset.delete() raise Conflict(str(exc)) self.log_event(self.events.create, dataset, kwargs) return self.response(dataset.uuid)
def getInputSchema(request: FlaskRequest, is_post: bool) -> Type[Schema]: graph = neo4j.get_instance() # as defined in Marshmallow.schema.from_dict attributes: Dict[str, Union[fields.Field, type]] = {} attributes["name"] = fields.Str(required=True) attributes["age"] = fields.Integer(allow_none=True, validate=validate.Range(min=0)) attributes["sex"] = fields.Str( required=True, validate=validate.OneOf(SEX), metadata={"description": ""} ) attributes["hpo"] = fields.List( fields.Str(), metadata={ "label": "HPO", "autocomplete_endpoint": "/api/hpo", "autocomplete_show_id": True, "autocomplete_id_bind": "hpo_id", "autocomplete_label_bind": "label", }, ) geodata_keys = [] geodata_labels = [] for g in graph.GeoData.nodes.all(): geodata_keys.append(g.uuid) geodata_labels.append(g.province) if len(geodata_keys) == 1: default_geodata = geodata_keys[0] else: default_geodata = None attributes["birth_place"] = fields.Str( required=False, allow_none=True, metadata={ "label": "Birth Place", "description": "", }, dump_default=default_geodata, validate=validate.OneOf(choices=geodata_keys, labels=geodata_labels), ) return Schema.from_dict(attributes, name="PhenotypeDefinition")
def post(self, uuid: str, name: str, user: User, **kwargs: Any) -> Response: # check permissions graph = neo4j.get_instance() dataset = graph.Dataset.nodes.get_or_none(uuid=uuid) self.verifyDatasetAccess(dataset, user=user) study = dataset.parent_study.single() self.verifyStudyAccess(study, user=user, error_type="Dataset") path = self.getPath(user=user, dataset=dataset) # check if the filename is correct name_pattern = r"([a-zA-Z0-9_-]+)_(R[12]).fastq.gz" if not re.match(name_pattern, name): raise BadRequest( "Filename does not follow the correct naming convention: " "SampleName_R1/R2.fastq.gz") # set the allowed file format self.set_allowed_exts(["gz"]) properties = { "name": name, "size": kwargs["size"], # Currently fixed "type": "fastq.gz", "status": "importing", } file = graph.File(**properties).save() file.dataset.connect(dataset) self.log_event( self.events.create, file, { "operation": f"Accepted upload for {name} file in {uuid} dataset" }, ) return self.init_chunk_upload(Path(path), name, force=False)
def post(self, uuid: str, user: User, **kwargs: Any) -> Response: graph = neo4j.get_instance() study = graph.Study.nodes.get_or_none(uuid=uuid) self.verifyStudyAccess(study, user=user) # kit = properties.get("enrichment_kit", None) # if kit is not None and "value" in kit: # properties["enrichment_kit"] = kit["value"] techmeta = graph.TechnicalMetadata(**kwargs).save() techmeta.defined_in.connect(study) self.log_event(self.events.create, techmeta, kwargs) return self.response(techmeta.uuid)
def post(self, user: User, **kwargs: Any) -> Response: graph = neo4j.get_instance() study = graph.Study(**kwargs).save() study.ownership.connect(user) path = self.getPath(user=user, study=study) try: path.mkdir(parents=True, exist_ok=False) except FileExistsError as exc: # pragma: no cover # Almost impossible the have same uuid was already used for an other study study.delete() raise Conflict(str(exc)) self.log_event(self.events.create, study, kwargs) return self.response(study.uuid)
def put(self, uuid: str, user: User, **kwargs: Any) -> Response: graph = neo4j.get_instance() techmeta = graph.TechnicalMetadata.nodes.get_or_none(uuid=uuid) if techmeta is None: raise NotFound(TECHMETA_NOT_FOUND) study = techmeta.defined_in.single() self.verifyStudyAccess(study, user=user, error_type="Technical Metadata") # kit = v.get("enrichment_kit", None) # if kit is not None and "value" in kit: # v["enrichment_kit"] = kit["value"] graph.update_properties(techmeta, kwargs) techmeta.save() self.log_event(self.events.modify, techmeta, kwargs) return self.empty_response()