def load_schemaorg_model(model_path):

    # instantiate schema explorer
    se = SchemaExplorer()
    se.load_schema(model_path)

    # visualize loaded schema
    full_schema = se.full_schema_graph()
    full_schema.engine = "fdp"
    full_schema.render(filename=os.path.basename("schema.org.model.pdf"),
                       view=True)

    return se
Beispiel #2
0
    def __init__(
        self,
        inputMModelLocation: str,
        inputMModelLocationType: str,
    ) -> None:
        """ Instantiates MetadataModel object

        Args: 
          se: a SchemaExplorer instance 
          inputMModelLocation:  local path, uri, synapse entity id; (e.g. gs://, syn123, /User/x/…); present location
          inputMModelLocationType: one of [local, gs, aws, synapse]; present location type
        """

        self.se = SchemaExplorer()

        self.inputMModelLocationType = inputMModelLocationType
        self.inputMModelLocation = inputMModelLocation

        self.loadMModel()
Beispiel #3
0
        value_constraint = {
            'rdfs:requiresChildAsValue': {
                '@id': 'sms:' + str(requires_value)
            }
        }
        class_attributes.update(value_constraint)

    return class_attributes


# path to schema metadata (output or input)
schema_path = "./schemas"
output_schema_name = "scRNASeq"

# instantiate schema explorer
se = SchemaExplorer()
"""
######################################################
# first add the classes w/o dependencies to the schema
######################################################
"""
'''
adding children classes to the Biosample class in biothing
'''

class_req_add = get_class("BiosampleType",\
                              description = "The type of source material for the biosample",\
                              subclass_of = ["Biosample"],
                              requires_value = True
                          )
se.update_class(class_req_add)
        value_constraint = {
            'rdfs:requiresChildAsValue': {
                '@id': 'sms:' + str(requires_value)
            }
        }
        class_attributes.update(value_constraint)

    return class_attributes


# path to schema metadata (output or input)
schema_path = "./schemas"
output_schema_name = "exampleSchemaReq"

# instantiate schema explorer
se = SchemaExplorer()
"""
######################################################
# first add the classes w/o dependencies to the schema
######################################################
"""
'''
adding fileFormat as a child of Thing
'''
class_req_add = get_class("fileFormat",\
                              description = "Defined format of the data file, typically corresponding to extension, but sometimes indicating more general group of files produced by the same tool or software",\
                              subclass_of = "Thing"
                          )
se.update_class(class_req_add)
'''
adding resourceType as a child of Thing
    #'http://schema.org/domainIncludes':{'@id': 'bts:' + property_class_name},
    #'http://schema.org/rangeIncludes':{'@id': 'schema:' + allowed_values},

    return new_property


def first_upper(s):
    return s[0].upper() + s[1:] if len(s) > 0 else s


annotations_path = "./data"
annotations_file = "psychENCODE.json"
base_sage_schema_file = "masterSage.jsonld"

# instantiate schema explorer
se = SchemaExplorer()

# visualize biothings schema
print("Visualizing BioThings schema...")
full_schema = se.full_schema_graph()
full_schema.render(filename=os.path.join(annotations_path,
                                         "biothings_schema.pdf"),
                   view=True)
print("Done")

# load Sage annotations (that have been converted to JSON-LD; note that although a large set has been already converted
# there are still annotation subsets that haven't been included)

se.load_schema(os.path.join(annotations_path, base_sage_schema_file))

# visualize default base Sage schema
    }

    return class_attributes


def first_upper(s):
    return s[0].upper() + s[1:] if len(s) > 0 else s


# path to Synapse annotations
annotations_path = "./data"
annotations_file = "sageCommunity.json"
base_schema_org_file = "experimentalData.jsonld"

# instantiate schema explorer
se = SchemaExplorer()
se.load_schema(os.path.join(annotations_path, base_schema_org_file))

# visualize default schema
full_schema = se.full_schema_graph()
full_schema.render(filename=os.path.join(
    annotations_path, annotations_file + "biothings_schema.gv.pdf"),
                   view=True)

# add adhoc classes; TODO: this should be generated based on a metadata model schema
'''
# experimentalData classes
new_class = get_class("Assay",\
          description = "The technology used to generate the data in this file",\
          subclass_of = "Thing"\
)
    if requires_value != None:
        value_constraint = {'rdfs:requiresChildAsValue':{'@id':'sms:' +  str(requires_value)}}
        class_attributes.update(value_constraint)
    
    return class_attributes



# path to schema metadata (output or input)
schema_path = "./schemas"
output_schema_name = "HTAPP"


# instantiate schema explorer
se = SchemaExplorer()


"""
######################################################
# first add the classes w/o dependencies to the schema
######################################################
"""
class_req_add = get_class("HTAPP",\
                              description = "HTAPP minimal metadata extension",\
                              subclass_of = ["Thing"]
)
se.update_class(class_req_add)


class_req_add = get_class("HTANParticipantID",\
Beispiel #8
0
class MetadataModel(object):
    """Metadata model wrapper around schema.org specification graph.
     Provides basic utilities to 

     1) manipulate the metadata model;
     2) generate metadata model views:
        - generate manifest view of the metadata metadata model
        - usage getModelManifest(rootNode)

        - generate validation schemas view of the metadata model;
        - TODO: not currently part of the specification; to be defined.

     """
    def __init__(
        self,
        inputMModelLocation: str,
        inputMModelLocationType: str,
    ) -> None:
        """ Instantiates MetadataModel object

        Args: 
          se: a SchemaExplorer instance 
          inputMModelLocation:  local path, uri, synapse entity id; (e.g. gs://, syn123, /User/x/…); present location
          inputMModelLocationType: one of [local, gs, aws, synapse]; present location type
        """

        self.se = SchemaExplorer()

        self.inputMModelLocationType = inputMModelLocationType
        self.inputMModelLocation = inputMModelLocation

        self.loadMModel()

    # setting mutators/accessors methods explicitly

    @property
    def inputMModelLocation(self) -> str:
        """Gets or sets the inputMModelLocation path"""
        return self.__inputMModelLocation

    @inputMModelLocation.setter
    def inputMModelLocation(self, inputMModelLocation) -> None:
        self.__inputMModelLocation = inputMModelLocation

    @property
    def inputMModelLocationType(self) -> str:
        """Gets or sets the inputMModelLocationType"""
        return self.__inputMModelLocationType

    @inputMModelLocationType.setter
    def inputMModelLocationType(self, inputMModelLocationType) -> None:
        self.__inputMModelLocationType = inputMModelLocationType

    @property
    def se(self) -> SchemaExplorer:
        """Gets or sets the SchemaExplorer instance"""
        return self.__se

    @se.setter
    def se(self, se: SchemaExplorer) -> None:
        self.__se = se

    # business logic: expose metadata model "views" depending on "controller" logic
    # (somewhat analogous to Model View Controller pattern for GUI/web applications)
    # i.e. jsonschemas, annotation manifests, metadata/annotation dictionary web explorer
    # are all "views" of the metadata model.
    # The "business logic" in this MetadataModel class provides functions exposing relevant parts
    # of the metadata model needed so that these views can be generated by user facing components;
    # controller components are (loosely speaking) responsible for handling the interaction between views and the model
    # some of these components right now reside in the Bundle class

    def loadMModel(self) -> None:
        """ load Schema; handles schema file input and sets mmodel
         """

        self.se.load_schema(self.inputMModelLocation)

    def getModelSubgraph(self, rootNode: str, subgraphType: str) -> nx.DiGraph:
        """ get a schema subgraph from rootNode descendants on edges/node properties of type subgraphType
         Args:
          rootNode: a schema node label (i.e. term)
          subgraphType: the kind of subgraph to traverse (i.e. based on node properties or edge labels)
        
         Returns: a directed graph (networkx DiGraph) subgraph of the metadata model w/ vertex set root node descendants

         Raises: 
             ValueError: rootNode not found in metadata model.
         """
        pass

    def getModelManifest(self, rootNode: str, filenames: list = None) -> str:
        """ get annotations manifest dataframe 
         Args:
          rootNode: a schema node label (i.e. term)
        
         Returns: a manifest URI (assume Google doc for now) 
         Raises: 
            ValueError: rootNode not found in metadata model.
         """

        additionalMetadata = {}
        if filenames:
            additionalMetadata["Filename"] = filenames

        # TODO: remove reference to HTAN; have a manifestName  attribute
        mg = ManifestGenerator(self.se, rootNode, "HTAN_" + rootNode,
                               additionalMetadata)

        return mg.getManifest()

    def validateModelManifest(self, manifestPath: str, rootNode: str) -> list:
        """ check if provided annotations manifest dataframe 
         satisfied all model requirements
         Args:
          rootNode: a schema node label (i.e. term)
          manifestPath: a path to the manifest csv file containing annotations
        
         Returns: a validation status message; if there is an error the message 
         contains the manifest annotation record (i.e. row) that is invalid, along 
         with the validation error associated with this record
         Raises: TODO 
            ValueError: rootNode not found in metadata model.
         """

        # get validation schema for a given node in the data model
        jsonSchema = get_JSONSchema_requirements(self.se, rootNode,
                                                 rootNode + "_validation")

        # get annotations from manifest (array of json annotations corresponding to manifest rows)

        manifest = pd.read_csv(manifestPath).fillna("")
        annotations = json.loads(manifest.to_json(orient='records'))

        errorPositions = []
        for i, annotation in enumerate(annotations):

            try:
                validate(instance=annotation, schema=jsonSchema)
            # this error parsing is too brittle; if something changes in the validator code outputting the validation error we'd have to change the logic; TODO: provide a more robust error parsing
            except ValidationError as e:
                listExp = re.compile('\[(.*?)\]')

                errorRow = i + 2  # row in the manifest where the error occurred

                # parse the validation error in a more human readable form
                errorMessage = "At row " + str(errorRow) + ": "

                errors = str(e).split("\n")

                stringExp = re.compile('\'(.*?)\'')

                # extract wrong value entered
                errorValue = stringExp.findall(errors[0])[0]

                errorMessage += errors[0]

                # extract allowed values, if any, for the term that was erroneously filled in
                allowedValues = listExp.findall(errorMessage)

                if allowedValues:
                    allowedValues = allowedValues[0].replace('\'',
                                                             '').split(", ")

                errorDetail = errors[-2].replace("On instance", "At term")

                #extract the term(s) that had erroneously filled in values, if any
                errorTerms = listExp.findall(errorDetail)
                if errorTerms:
                    errorTerms = errorTerms[0].replace('\'', '').split(", ")[0]

                errorMessage += "; " + errorDetail
                errorDetail = " value " + errors[-1].strip() + " is invalid;"
                errorMessage += errorDetail

                errorPositions.append(
                    (errorRow, errorTerms, errorValue, allowedValues))
        print(errorPositions)
        return errorPositions

    def populateModelManifest(self, manifestPath: str, rootNode: str) -> str:
        """ populate an existing annotations manifest based on a dataframe          
         
         Args:
          rootNode: a schema node label (i.e. term)
          manifestPath: a path to the manifest csv file containing annotations
        
         Returns: a link to the filled in model manifest (e.g. google sheet)

         Raises: TODO 
            ValueError: rootNode not found in metadata model.
         """
        mg = ManifestGenerator(self.se, rootNode, "HTAN_" + rootNode, {})
        emptyManifestURL = mg.getManifest()

        return mg.populateManifestSpreasheet(manifestPath, emptyManifestURL)


"""
###############################################
===============================================
###############################################
"""

json_schema_output_dir = "./schemas"
schemaorg_schema_input_dir = "./data"
requires_dependency = "requiresDependency"
requires_child = "requiresChildAsValue"

    
if __name__ == "__main__":

    schemaorg_schema_file_name = "NFSchemaReq.jsonld"
    json_schema_file_name = "nf_jsonschema.json"

    se = SchemaExplorer()
    se.load_schema(os.path.join(schemaorg_schema_input_dir, schemaorg_schema_file_name))

    g = se.get_nx_schema()

    json_schema = get_JSONSchema_requirements(se, "Thing", schema_name = "NFJSONschema")

    with open(os.path.join(json_schema_output_dir, json_schema_file_name), "w") as s_f:
        json.dump(json_schema, s_f, indent = 3)