Ejemplo n.º 1
0
"""

# saving updated schema.org schema
se.export_schema(os.path.join(schema_path, output_schema_name + ".jsonld"))
"""
######################################################
# Generating JSONSchema schema from schema.org schema
######################################################
"""
'''
To generate JSONSchema schema for validation based on this schema.org schema
run ./schema_generator.py; just point it to the output schema above or invoke 
directly the JSONSchema generation method as show below
'''

from schema_generator import get_JSONSchema_requirements

# see schema_generator.py for more details on parameters

#JSONSchema name
json_schema_name = "scRNASeqJSONSchema"

json_schema = get_JSONSchema_requirements(se,
                                          "scRNASeq",
                                          schema_name=json_schema_name)

# store the JSONSchema schema
with open(os.path.join(schema_path, json_schema_name + ".json"), "w") as s_f:
    json.dump(json_schema, s_f,
              indent=3)  # adjust indent - lower for more compact schemas
"""
'''
To generate JSONSchema schema for validation based on this schema.org schema
run ./schema_generator.py; just point it to the output schema above or invoke 
directly the JSONSchema generation method as show below
'''

from schema_generator import get_JSONSchema_requirements

# see schema_generator.py for more details on parameters

#JSONSchema name
json_schema_name = "exampleJSONSchema"

json_schema = get_JSONSchema_requirements(se,
                                          "Thing",
                                          schema_name=json_schema_name)

# store the JSONSchema schema
with open(os.path.join(schema_path, json_schema_name + ".json"), "w") as s_f:
    json.dump(json_schema, s_f,
              indent=3)  # adjust indent - lower for more compact schemas
"""
######################################################
# Generating annotations manifest from schema.org schema
######################################################
"""

from manifest_generator import get_manifest

print("==========================")
Ejemplo n.º 3
0
    def getManifest(self):

        self.buildCredentials()

        spreadsheetId = self._createEmptyManifestSpreadsheet(self.title)

        json_schema = get_JSONSchema_requirements(self.se, self.root,
                                                  self.title)

        required_metadata_fields = {}

        # gathering dependency requirements and corresponding allowed values constraints for root node
        for req in json_schema["required"]:
            if req in json_schema["properties"]:
                required_metadata_fields[req] = json_schema["properties"][req][
                    "enum"]
            else:
                required_metadata_fields[req] = []

        # gathering dependency requirements and allowed value constraints for conditional dependencies if any
        if "allOf" in json_schema:

            for conditional_reqs in json_schema["allOf"]:
                if "required" in conditional_reqs["if"]:
                    for req in conditional_reqs["if"]["required"]:
                        if req in conditional_reqs["if"]["properties"]:
                            if not req in required_metadata_fields:
                                if req in json_schema["properties"]:
                                    required_metadata_fields[
                                        req] = json_schema["properties"][req][
                                            "enum"]
                                else:
                                    required_metadata_fields[
                                        req] = conditional_reqs["if"][
                                            "properties"][req]["enum"]

                    for req in conditional_reqs["then"]["required"]:
                        if not req in required_metadata_fields:
                            if req in json_schema["properties"]:
                                required_metadata_fields[req] = json_schema[
                                    "properties"][req]["enum"]
                            else:
                                required_metadata_fields[req] = []

        # if additional metadata is provided append columns (if those do not exist already

        if self.additionalMetadata:
            for column in self.additionalMetadata.keys():
                if not column in required_metadata_fields:
                    required_metadata_fields[column] = []

        # adding columns
        end_col = len(required_metadata_fields.keys())
        end_col_letter = self._columnToLetter(end_col)

        range = "Sheet1!A1:" + str(end_col_letter) + "1"
        values = [list(required_metadata_fields.keys())]

        body = {"values": values}

        self.sheetService.spreadsheets().values().update(
            spreadsheetId=spreadsheetId,
            range=range,
            valueInputOption="RAW",
            body=body).execute()

        # adding additinoal metadata values if needed and adding value-constraints from data model as dropdowns
        for i, (req, values) in enumerate(required_metadata_fields.items()):

            #adding additional metadata if needed
            if self.additionalMetadata and req in self.additionalMetadata:

                values = self.additionalMetadata[req]
                target_col_letter = self._columnToLetter(i)

                body = {"majorDimension": "COLUMNS", "values": [values]}

                response = self.sheetService.spreadsheets().values().update(
                    spreadsheetId=spreadsheetId,
                    range=target_col_letter + '2:' + target_col_letter +
                    str(len(values) + 1),
                    valueInputOption="RAW",
                    body=body).execute()

                continue

            # adding value-constraints if any
            req_vals = [{
                "userEnteredValue": value
            } for value in values if value]

            if not req_vals:
                continue

            body = {
                "requests": [{
                    'setDataValidation': {
                        'range': {
                            'startRowIndex': 1,
                            'startColumnIndex': i,
                            'endColumnIndex': i + 1,
                        },
                        'rule': {
                            'condition': {
                                'type': 'ONE_OF_LIST',
                                'values': req_vals
                            },
                            'inputMessage': 'Choose one from dropdown',
                            'strict': True,
                            'showCustomUi': True
                        }
                    }
                }]
            }

            response = self.sheetService.spreadsheets().batchUpdate(
                spreadsheetId=spreadsheetId, body=body).execute()

        # setting up spreadsheet permissions (setup so that anyone with the link can edit)
        self._setPermissions(spreadsheetId)

        manifestUrl = "https://docs.google.com/spreadsheets/d/" + spreadsheetId

        print("==========================")
        print("Manifest successfully generated from schema!")
        print("URL: " + manifestUrl)
        print("==========================")

        return manifestUrl
"""

# saving updated schema.org schema
se.export_schema(os.path.join(schema_path, output_schema_name + ".jsonld"))


"""
######################################################
# Generating JSONSchema schema from schema.org schema
######################################################
"""

'''
To generate JSONSchema schema for validation based on this schema.org schema
run ./schema_generator.py; just point it to the output schema above or invoke 
directly the JSONSchema generation method as show below
'''

from schema_generator import get_JSONSchema_requirements 

# see schema_generator.py for more details on parameters

#JSONSchema name 
json_schema_name = "minimalHTAPPJSONSchema"

json_schema = get_JSONSchema_requirements(se, "HTAPP", schema_name = json_schema_name)

# store the JSONSchema schema
with open(os.path.join(schema_path, json_schema_name + ".json"), "w") as s_f:
    json.dump(json_schema, s_f, indent = 3) # adjust indent - lower for more compact schemas
Ejemplo n.º 5
0
    def validateModelManifest(self, manifestPath: str, rootNode: str) -> list:
        """ check if provided annotations manifest dataframe 
         satisfied all model requirements
         Args:
          rootNode: a schema node label (i.e. term)
          manifestPath: a path to the manifest csv file containing annotations
        
         Returns: a validation status message; if there is an error the message 
         contains the manifest annotation record (i.e. row) that is invalid, along 
         with the validation error associated with this record
         Raises: TODO 
            ValueError: rootNode not found in metadata model.
         """

        # get validation schema for a given node in the data model
        jsonSchema = get_JSONSchema_requirements(self.se, rootNode,
                                                 rootNode + "_validation")

        # get annotations from manifest (array of json annotations corresponding to manifest rows)

        manifest = pd.read_csv(manifestPath).fillna("")
        annotations = json.loads(manifest.to_json(orient='records'))

        errorPositions = []
        for i, annotation in enumerate(annotations):

            try:
                validate(instance=annotation, schema=jsonSchema)
            # this error parsing is too brittle; if something changes in the validator code outputting the validation error we'd have to change the logic; TODO: provide a more robust error parsing
            except ValidationError as e:
                listExp = re.compile('\[(.*?)\]')

                errorRow = i + 2  # row in the manifest where the error occurred

                # parse the validation error in a more human readable form
                errorMessage = "At row " + str(errorRow) + ": "

                errors = str(e).split("\n")

                stringExp = re.compile('\'(.*?)\'')

                # extract wrong value entered
                errorValue = stringExp.findall(errors[0])[0]

                errorMessage += errors[0]

                # extract allowed values, if any, for the term that was erroneously filled in
                allowedValues = listExp.findall(errorMessage)

                if allowedValues:
                    allowedValues = allowedValues[0].replace('\'',
                                                             '').split(", ")

                errorDetail = errors[-2].replace("On instance", "At term")

                #extract the term(s) that had erroneously filled in values, if any
                errorTerms = listExp.findall(errorDetail)
                if errorTerms:
                    errorTerms = errorTerms[0].replace('\'', '').split(", ")[0]

                errorMessage += "; " + errorDetail
                errorDetail = " value " + errors[-1].strip() + " is invalid;"
                errorMessage += errorDetail

                errorPositions.append(
                    (errorRow, errorTerms, errorValue, allowedValues))
        print(errorPositions)
        return errorPositions