def metadata_conversion(old_sql, new_sql, user, user_email): """ Conversion of an existing metadata file to a newer version Parameters ---------- old_sql: str The path to the file containing the old sql file. new_sql: str The filename of the new sql file. user: str The name of the user for the 'contributions' section user_email: str The email address of the user. Returns ------- """ parser = JSONParser_1_3() metadata = parser.parse_from_file(old_sql) metadata.contributors.append( structure.Contribution( title=user, email=user_email, date=datetime.now, obj=None, comment="Update metadata to v1.3 using metadata conversion tool", )) compiler = JSONCompiler() with open(new_sql) as out_file: out_file.write(compiler.visit(metadata))
def parse_contributor(self, graph: Graph, parent: Node) -> struc.Contribution: return struc.Contribution( contributor=self.parse_person( graph, _only(graph.objects(parent, DCTERMS.contributor)) ), date=self.parse_date(_only(graph.objects(parent, OEO.date))), obj=_one_str_or_none(graph.objects(parent, OEO.object)), comment=_one_str_or_none(graph.objects(parent, OEO.comment)), )
licenses=[s.TermsOfUse(attribution="Publisher2")], ), ], terms_of_use=[ s.TermsOfUse( lic=odbl10, instruction= "You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!", attribution="Institute", ) ], contributions=[ s.Contribution( contributor=s.Person(name="Person McHuman", email="*****@*****.**"), date=datetime.datetime(2011, 1, 11, 0, 0, 0), obj=None, comment="Prepared the dataset", ), s.Contribution( contributor=s.Person(name="Indivia Mensch", email="*****@*****.**"), date=datetime.datetime(2012, 2, 12, 0, 0, 0), obj=None, comment="Fixed Metadata String and date format ", ), ], resources=[ s.Resource( name="example.datatable", resource_format="PostgreSQL",
def parse(self, json_old, *args, **kwargs): # context section context = None # filling the spatial section if "spatial" in json_old: old_spatial = json_old.get("spatial") spatial = structure.Spatial( extent=old_spatial.get("extent"), resolution=old_spatial.get("resolution"), ) else: spatial = None # filling the temporal section old_temporal = json_old.get("temporal") if old_temporal is None: temporal = None else: temporal = structure.Temporal(reference_date=parse_date_or_none( old_temporal.get("reference_date"))) # filling the source section # For future reference: There is an important semantic difference between `source = None` and `sources = []` # The former means that there is no information regarding sources the latter means that there are no sources. # This is holds for all lists around here old_sources = json_old.get("sources") if old_sources is None: sources = None else: sources = [ structure.Source( title=old_source.get("name"), description=old_source.get("description"), path=old_source.get("url"), licenses=[ structure.TermsOfUse( attribution=old_source.get("copyright")) ], ) for old_source in old_sources ] # filling the license section old_license = json_old.get("license") if old_license is None: licenses = None # not []! (see sources) else: licenses = [ structure.TermsOfUse( lic=structure.License( identifier=old_license.get("id"), name=old_license.get("name"), path=old_license.get("url"), ), instruction=old_license.get("instruction"), attribution=old_license.get("copyright"), ) ] # filling the contributers section old_contributors = json_old.get("contributors") if old_contributors is None: contributions = None else: contributions = [ structure.Contribution( contributor=structure.Person( name=old_contributor.get("name"), email=old_contributor.get("email"), ), date=parse_date_or_none(old_contributor.get("date")), comment=old_contributor.get("comment"), ) for old_contributor in old_contributors ] # extending with script-user information old_resources = json_old.get("resources") if old_resources is None: resources = None else: resources = [] for resource in old_resources: old_fields = resource.get("fields") if old_fields is None: fields = None else: fields = [ structure.Field( name=field.get("name"), description=field.get("description"), unit=field.get("unit"), ) for field in old_fields ] schema = structure.Schema(fields=fields) resources.append( structure.Resource( name=resource.get("name"), resource_format="PostgreSQL", schema=schema, )) review = None comment = None metadata = structure.OEPMetadata( title=json_old.get("title"), description=json_old.get("description"), languages=json_old.get("language"), identifier=None, context=context, spatial=spatial, temporal=temporal, sources=sources, terms_of_use=licenses, contributions=contributions, resources=resources, review=review, comment=comment, ) return metadata
def parse(self, json_old: dict, *args, **kwargs): # context section if "id" not in json_old: raise ParserException("metadata string does not contain an id") inp_context = json_old.get("context") if inp_context is None: context = None else: funding_agency = None if "fundingAgency" in inp_context: funding_agency = structure.Agency( name=inp_context.get("fundingAgency"), logo=inp_context.get("fundingAgencyLogo"), ) context = structure.Context( homepage=inp_context.get("homepage"), documentation=inp_context.get("documentation"), source_code=inp_context.get("sourceCode"), contact=inp_context.get("contact"), grant_number=inp_context.get("grantNo"), funding_agency=funding_agency, publisher=structure.Agency( logo=inp_context.get("publisherLogo")) if "publisherLogo" in inp_context else None, ) # filling the spatial section old_spatial = json_old.get("spatial") if old_spatial is None: spatial = None else: spatial = structure.Spatial( location=old_spatial.get("location"), extent=old_spatial.get("extent"), resolution=old_spatial.get("resolution"), ) # filling the temporal section inp_temporal = json_old.get("temporal") if inp_temporal is None: temporal = None else: inp_timeseries = inp_temporal.get("timeseries") timeseries = {} if inp_timeseries is not None: timeseries = dict( start=parse_date_or_none(inp_timeseries.get("start")), end=parse_date_or_none(inp_timeseries.get("end")), resolution=inp_timeseries.get("resolution"), ts_orientation=structure.TimestampOrientation.create( inp_timeseries.get("alignment")) if "alignment" in inp_timeseries and inp_timeseries["alignment"] is not None else None, aggregation=inp_timeseries.get("aggregationType"), ) temporal = structure.Temporal(reference_date=parse_date_or_none( inp_temporal.get("referenceDate")), **timeseries) # filling the source section old_sources = json_old.get("sources") if old_sources is None: sources = None else: sources = [ structure.Source( title=old_source.get("title"), description=old_source.get("description"), path=old_source.get("path"), licenses=[ self.parse_term_of_use(l) for l in old_source.get("licenses", []) ], ) for old_source in old_sources ] # filling the license section old_licenses = json_old.get("licenses") if old_licenses is None: licenses = None else: licenses = [ self.parse_term_of_use(old_license) for old_license in old_licenses ] # filling the contributers section old_contributors = json_old.get("contributors") if old_contributors is None: contributors = None else: contributors = [ structure.Contribution( contributor=structure.Person( name=old_contributor.get("title"), email=old_contributor.get("email"), ), date=parse_date_or_none(old_contributor.get("date")), obj=old_contributor.get("object"), comment=old_contributor.get("comment"), ) for old_contributor in old_contributors ] # extending with script-user information old_resources = json_old.get("resources") if old_resources is None: resources = None else: resources = [] for resource in old_resources: old_schema = resource.get("schema") if old_schema is None: schema = None else: old_fields = old_schema.get("fields") if old_fields is None: fields = None else: fields = [ structure.Field( name=field.get("name"), description=field.get("description"), field_type=field.get("type"), unit=field.get("unit"), ) for field in old_fields ] field_dict = {field.name: field for field in fields or []} old_foreign_keys = old_schema.get("foreignKeys", []) foreign_keys = [] for fk in old_foreign_keys: old_reference = fk.get("reference") if old_reference is None: raise ParserException( "Foreign key without reference:", fk) source_fields = [ field_dict[field_name] for field_name in fk.get("fields", []) ] old_referenced_fields = old_reference.get("fields") if old_referenced_fields is None: referenced_fields = None else: referenced_fields = [ structure.Field(name=fk_field) for fk_field in old_referenced_fields ] referenced_resource = structure.Resource( name=old_reference.get("resource"), schema=structure.Schema(fields=referenced_fields), ) for rf in referenced_fields: rf.resource = referenced_resource references = [ structure.Reference(s, t) for s, t in zip(source_fields, referenced_fields) ] foreign_keys.append( structure.ForeignKey(references=references)) schema = structure.Schema( fields=fields, primary_key=resource["schema"].get("primaryKey"), foreign_keys=foreign_keys, ) old_dialect = resource.get("dialect") if old_dialect is None: dialect = None else: dialect = structure.Dialect( delimiter=resource["dialect"].get("delimiter"), decimal_separator=resource["dialect"].get( "decimalSeparator"), ) resources.append( structure.Resource( profile=resource.get("profile"), name=resource.get("name"), path=resource.get("path"), resource_format=resource.get("format"), encoding=resource.get("encoding"), schema=schema, dialect=dialect, )) inp_review = json_old.get("review") if inp_review is None: review = None else: review = structure.Review(path=inp_review.get("path"), badge=inp_review.get("badge")) inp_comment = json_old.get("_comment") if inp_comment is None: comment = None else: comment = structure.MetaComment( metadata_info=inp_comment.get("metadata"), dates=inp_comment.get("dates"), units=inp_comment.get("units"), languages=inp_comment.get("languages"), licenses=inp_comment.get("licenses"), review=inp_comment.get("review"), none=inp_comment.get("null"), ) metadata = structure.OEPMetadata( name=json_old.get("name"), title=json_old.get("title"), identifier=json_old["id"], description=json_old.get("description"), languages=json_old.get("language"), keywords=json_old.get("keywords"), publication_date=parse_date_or_none( json_old.get("publicationDate")), context=context, spatial=spatial, temporal=temporal, sources=sources, terms_of_use=licenses, contributions=contributors, resources=resources, review=review, comment=comment, ) return metadata
def parse(self, json_old, *args, **kwargs): # context section context = structure.Context( homepage=None, documentation=None, source_code=None, contact=None, grant_number=None, ) # filling the spatial section old_spatial = json_old.get("spatial") spatial = structure.Spatial( location=None, extent=old_spatial.get("extent"), resolution=old_spatial.get("resolution"), ) # filling the temporal section temporal = structure.Temporal( reference_date=parse_date( json_old["temporal"].get("reference_date")), start=None, end=None, resolution=None, ts_orientation=None, ) # filling the source section sources = [ structure.Source( title=old_source.get("name"), description=old_source.get("description"), path=old_source.get("url"), source_license=None, source_copyright=old_source.get("copyright"), ) for old_source in json_old.get("sources") ] # filling the license section old_license = json_old.get("license") licenses = [ structure.TermsOfUse( lic=structure.License( identifier=old_license.get("id"), name=old_license.get("name"), path=old_license.get("url"), other_references=[], text=None, ), instruction=old_license.get("instruction"), attribution=old_license.get("copyright"), ) ] # filling the contributers section contributions = [ structure.Contribution( contributor=structure.Person( name=old_contributor.get("name"), email=old_contributor.get("email")), date=parse_date(old_contributor.get("date")), obj=None, comment=old_contributor.get("comment"), ) for old_contributor in json_old.get("contributors") ] # extending with script-user information resources = [] for resource in json_old.get("resources"): fields = [ structure.Field( name=field.get("name"), description=field.get("description"), field_type=None, unit=field.get("unit"), ) for field in resource.get("fields", []) ] schema = structure.Schema(fields=fields, primary_key=None, foreign_keys=[]) resources.append( structure.Resource( profile=None, name=resource.get("name"), path=None, resource_format="PostgreSQL", encoding=None, dialect=None, schema=schema, )) review = structure.Review(path=None, badge=None) comment = structure.MetaComment( metadata_info= "Metadata documentation and explanation (https://github.com/OpenEnergyPlatform/organisation/wiki/metadata)", dates= "Dates and time must follow the ISO8601 including time zone (YYYY-MM-DD or YYYY-MM-DDThh:mm:ss±hh)", units="Use a space between numbers and units (100 m)", languages= "Languages must follow the IETF (BCP47) format (en-GB, en-US, de-DE)", licenses= "License name must follow the SPDX License List (https://spdx.org/licenses/", review= "Following the OEP Data Review (https://github.com/OpenEnergyPlatform/data-preprocessing/wiki)", none="If not applicable use (none)", ) metadata = structure.OEPMetadata( name=None, title=json_old.get("title"), identifier=None, description=json_old.get("description"), languages=json_old.get("language"), keywords=[], publication_date=None, context=context, spatial=spatial, temporal=temporal, sources=sources, terms_of_use=licenses, contributions=contributions, resources=resources, review=review, comment=comment, ) return metadata
def parse(self, json_old): # context section inp_context = json_old.get("context") context = structure.Context( homepage=inp_context.get("homepage"), documentation=inp_context.get("documentation"), source_code=inp_context.get("sourceCode"), contact=inp_context.get("contact"), grant_number=inp_context.get("grantNo"), ) # filling the spatial section old_spatial = json_old.get("spatial") spatial = structure.Spatial( location=old_spatial.get("location"), extent=old_spatial.get("extent"), resolution=old_spatial.get("resolution"), ) # filling the temporal section inp_temporal = json_old["temporal"] temporal = structure.Temporal( reference_date=parse_date(inp_temporal.get("referenceDate")), start=parse_date(inp_temporal.get("start")), end=parse_date(inp_temporal.get("end")), resolution=inp_temporal.get("resolution"), ts_orientation=structure.TimestampOrientation.create( inp_temporal.get("timestamp")), ) # filling the source section sources = [ structure.Source( title=old_source.get("title"), description=old_source.get("description"), path=old_source.get("path"), source_license=structure.License( name=None, identifier=old_source.get("license"), other_references=[], path=None, text=None, ), source_copyright=old_source.get("copyright"), ) for old_source in json_old.get("sources", []) ] # filling the license section licenses = [ structure.TermsOfUse( lic=structure.License( identifier=old_license.get("name"), name=old_license.get("title"), path=old_license.get("path"), other_references=[], text=None, ), instruction=old_license.get("instruction"), attribution=old_license.get("attribution"), ) for old_license in json_old.get("licenses") ] # filling the contributers section contributors = [ structure.Contribution( contributor=structure.Person( name=old_contributor.get("title"), email=old_contributor.get("email"), ), date=parse_date(old_contributor.get("date")), obj=old_contributor.get("object"), comment=old_contributor.get("comment"), ) for old_contributor in json_old.get("contributors") ] # extending with script-user information resources = [] for resource in json_old.get("resources", []): fields = [ structure.Field( name=field.get("name"), description=field.get("description"), field_type=field.get("type"), unit=field.get("unit"), ) for field in resource["schema"].get("fields", []) ] field_dict = {field.name: field for field in fields} foreign_keys = [] for fk in resource["schema"].get("foreignKeys"): source_fields = [ field_dict[field_name] for field_name in fk.get("fields", []) ] referenced_fields = [ structure.Field(name=fk_field, unit=None, field_type=None, description=None) for fk_field in fk["reference"].get("fields") ] referenced_resource = structure.Resource( name=fk["reference"].get("resource"), schema=structure.Schema(fields=referenced_fields, foreign_keys=None, primary_key=None), dialect=None, encoding=None, path=None, profile=None, resource_format=None, ) l = list() print(l) references = [ structure.Reference(s, t) for s, t in zip(source_fields, referenced_fields) ] foreign_keys.append( structure.ForeignKey(references=references)) schema = structure.Schema( fields=fields, primary_key=resource["schema"].get("primaryKey"), foreign_keys=foreign_keys, ) dialect = structure.Dialect( delimiter=resource["dialect"].get("delimiter"), decimal_separator=resource["dialect"].get("decimalSeparator"), ) resources.append( structure.Resource( profile=resource.get("profile"), name=resource.get("name"), path=resource.get("path"), resource_format=resource.get("format"), encoding=resource.get("encoding"), schema=schema, dialect=dialect, )) inp_review = json_old["review"] review = structure.Review(path=inp_review.get("path"), badge=inp_review.get("badge")) inp_comment = json_old["_comment"] comment = structure.MetaComment( metadata_info=inp_comment.get("metadata"), dates=inp_comment.get("dates"), units=inp_comment.get("units"), languages=inp_comment.get("languages"), licenses=inp_comment.get("licenses"), review=inp_comment.get("review"), none=inp_comment.get("none"), ) metadata = structure.OEPMetadata( name=json_old.get("name"), title=json_old.get("title"), identifier=json_old.get("id"), description=json_old.get("description"), languages=json_old.get("language"), keywords=json_old.get("keywords"), publication_date=parse_date(json_old.get("publicationDate")), context=context, spatial=spatial, temporal=temporal, sources=sources, terms_of_use=licenses, contributions=contributors, resources=resources, review=review, comment=comment, ) return metadata