def create_resource(path): """ """ from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor[ 'description'] = 'Contains the hubs (nodes) for the energy system representation' resource.descriptor[ 'title'] = 'Energy system hubs for DE and its electrical neighbours' resource.descriptor['sources'] = [{ 'title': 'NUTS Shapefiles', 'path': 'http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/NUTS_2013_10M_SH.zip', 'files': [ 'NUTS_2013_10M_SH/data/NUTS_RG_10M_2013.shp', 'NUTS_2013_10M_SH/data/NUTS_RG_10M_2013.dbf' ] }] resource.commit() resource.descriptor if resource.valid: resource.save('resources/' + resource.name + '.json')
def create_resource(path): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor[ 'description'] = 'Installed transmission capacities from the e-highway 2050 scenario' resource.descriptor['title'] = 'Installed transmission capacities' resource.descriptor['sources'] = [{ 'title': 'E-Highway 2050 transmission capacities', 'path': 'http://www.e-highway2050.eu/fileadmin/documents/' + 'Results/e-Highway_database_per_country-08022016.xlsx' }] resource.descriptor['schema']['foreignKeys'] = [{ "fields": "from_bus", "reference": { "resource": "bus", "fields": "name" } }, { "fields": "to_bus", "reference": { "resource": "bus", "fields": "name" } }] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def create_resource(path): """ """ from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor[ 'description'] = 'Excess slacks for each electricity hub in the energy system representation' resource.descriptor[ 'title'] = 'Excess slacks for DE and its electrical neighbours' resource.descriptor['schema']['foreignKeys'] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name" } }] resource.commit() resource.descriptor if resource.valid: resource.save('resources/' + resource.name + '.json')
def create_resource(path): """ """ mapper = {} from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor[ 'description'] = 'Installed capacities, costs and technical parameters for components' resource.descriptor['title'] = '{} components'.format( resource.name.title()) resource.descriptor['sources'] = [{ 'title': 'E-Highway 2050 installed capacities', 'path': 'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx' }] resource.descriptor['schema']['foreignKeys'] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name" } }] if 'demand' in resource.name: resource.descriptor['schema']['foreignKeys'].append({ "fields": "profile", "reference": { "resource": "demand-profiles" } }) elif 'volatile-generator' in resource.name: resource.descriptor['schema']['foreignKeys'].append({ "fields": "profile", "reference": { "resource": "generator-profiles" } }) resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json') else: print('Resource is not valid, writing resource anyway...') resource.save('resources/' + resource.name + '.json')
def create_resource(path): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'timeindex' resource.descriptor['description'] = ( 'Profiles for Run of River (ROR) components. The profile is assumed' + ' to be constant during the year.') resource.descriptor['title'] = 'ROR profiles' resource.descriptor['sources'] = [{'title': 'Assumption'}] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def create_resource(path): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'timeindex' resource.descriptor[ 'description'] = 'PV profiles (capacity factors) from renewables ninja for each country' resource.descriptor['title'] = 'PV profiles' resource.descriptor['sources'] = [{ 'title': 'Renewables Ninja PV Capacity Factors', 'path': 'https://www.renewables.ninja/static/downloads/ninja_europe_pv_v1.1.zip' }] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def create_resource(path): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'timeindex' resource.descriptor['description'] = 'Demand profiles per country' resource.descriptor['title'] = 'Demand profiles' resource.descriptor['sources'] = [{ 'title': 'OPSD timeseries', 'path': 'https://data.open-power-system-data.org/time_series/2017-07-09/' + 'time_series_60min_singleindex.csv' }] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def create_resource(path, title): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor[ 'description'] = 'Installed capacities, costs and technical parameters for components' resource.descriptor['title'] = title resource.descriptor['sources'] = [{ 'title': 'Restore 2050 hydro inflow timeseries', 'path': 'https://zenodo.org/record/804244/files/Hydro_Inflow.zip' }, { 'title': 'E-Highway 2050 installed capacities', 'path': 'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx' }, { 'title': 'DIW Berlin - Current and Prospective Costs of Electricity Generation until 2050', 'path': 'https://www.diw.de/documents/publikationen/73/diw_01.c.424566.de/diw_datadoc_2013-068.pdf' }] resource.descriptor['schema']['foreignKeys'] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name" } }] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def infer_metadata( package_name="default-name", keep_resources=False, foreign_keys={ "bus": [ "volatile", "dispatchable", "storage", "load", "reservoir", "shortage", "excess", ], "profile": ["load", "volatile", "ror"], "from_to_bus": ["connection", "line", "conversion"], "chp": ["backpressure", "extraction", "chp"], }, path=None, ): """ Add basic meta data for a datapackage Parameters ---------- package_name: string Name of the data package keep_resource: boolean Flag indicating of the resources meta data json-files should be kept after main datapackage.json is created. The reource meta data will be stored in the `resources` directory. foreign_keys: dict Dictionary with foreign key specification. Keys for dictionary are: 'bus', 'profile', 'from_to_bus'. Values are list with strings with the name of the resources path: string Absoltue path to root-folder of the datapackage """ current_path = os.getcwd() if path: print("Setting current work directory to {}".format(path)) os.chdir(path) p = Package() p.descriptor["name"] = package_name p.descriptor["profile"] = "tabular-data-package" p.commit() if not os.path.exists("resources"): os.makedirs("resources") # create meta data resources elements if not os.path.exists("data/elements"): print("No data path found in directory {}. Skipping...".format( os.getcwd())) else: for f in os.listdir("data/elements"): r = Resource({"path": os.path.join("data/elements", f)}) r.infer() r.descriptor["schema"]["primaryKey"] = "name" if r.name in foreign_keys.get("bus", []): r.descriptor["schema"]["foreignKeys"] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name" }, }] if r.name in foreign_keys.get("profile", []): r.descriptor["schema"]["foreignKeys"].append({ "fields": "profile", "reference": { "resource": r.name + "_profile" }, }) elif r.name in foreign_keys.get("from_to_bus", []): r.descriptor["schema"]["foreignKeys"] = [ { "fields": "from_bus", "reference": { "resource": "bus", "fields": "name" }, }, { "fields": "to_bus", "reference": { "resource": "bus", "fields": "name" }, }, ] elif r.name in foreign_keys.get("chp", []): r.descriptor["schema"]["foreignKeys"] = [ { "fields": "fuel_bus", "reference": { "resource": "bus", "fields": "name" }, }, { "fields": "electricity_bus", "reference": { "resource": "bus", "fields": "name" }, }, { "fields": "heat_bus", "reference": { "resource": "bus", "fields": "name" }, }, ] r.commit() r.save(os.path.join("resources", f.replace(".csv", ".json"))) p.add_resource(r.descriptor) # create meta data resources elements if not os.path.exists("data/sequences"): print("No data path found in directory {}. Skipping...".format( os.getcwd())) else: for f in os.listdir("data/sequences"): r = Resource({"path": os.path.join("data/sequences", f)}) r.infer() r.commit() r.save(os.path.join("resources", f.replace(".csv", ".json"))) p.add_resource(r.descriptor) p.commit() p.save("datapackage.json") if not keep_resources: shutil.rmtree("resources") os.chdir(current_path)
'river': 'river in which the plant is located', 'river_km': 'km from stream source', 'level_meter': 'assigned level meter for flow curve' } # create resource r = Resource({'path': 'data/runofriver.csv'}) # get basic metadata from data r.infer() # add description for fields based on mapper for i in range(len(r.descriptor['schema']['fields'])): r.descriptor['schema']['fields'][i]['description'] = \ description_mapper[r.descriptor['schema']['fields'][i]['name']] # commit (apply) changes to resource r.commit() # save the resource r.save('dataresource.json') # create a package p = Package() # add the resource descriptor p.add_resource(r.descriptor) # save the package p.save('datapackage.json')
# Infer resource.infer() print(resource.descriptor) #{ path: 'data.csv', # profile: 'tabular-data-resource', # encoding: 'utf-8', # name: 'data', # format: 'csv', # mediatype: 'text/csv', # schema: { fields: [ [Object], [Object] ], missingValues: [ '' ] } } # resource.read(keyed=True) # Fails with a data validation error # Tweak resource.descriptor['schema']['missingValues'] = 'N/A' resource.commit() resource.valid # False print(resource.errors) # Error: Descriptor validation error: # Invalid type: string (expected array) # at "/missingValues" in descriptor and # at "/properties/missingValues/type" in profile # Tweak-2 resource.descriptor['schema']['missingValues'] = ['', 'N/A'] resource.commit() print(resource.valid) # true # Read print(resource.read(keyed=True)) # [
class load(DataStreamProcessor): def __init__(self, load_source, name=None, resources=None, validate=False, strip=True, **options): super(load, self).__init__() self.load_source = load_source self.options = options self.name = name self.resources = resources self.load_dp = None self.validate = validate self.strip = strip self.force_strings = options.get('force_strings') is True def process_datapackage(self, dp: Package): if isinstance(self.load_source, tuple): datapackage_descriptor, _ = self.load_source dp.descriptor.setdefault('resources', []) self.resource_matcher = ResourceMatcher(self.resources, datapackage_descriptor) for resource_descriptor in datapackage_descriptor['resources']: if self.resource_matcher.match(resource_descriptor['name']): dp.add_resource(resource_descriptor) else: # load_source is string: if self.load_source.startswith('env://'): env_var = self.load_source[6:] self.load_source = os.environ.get(env_var) if self.load_source is None: raise ValueError( f"Couldn't find value for env var '{env_var}'") if os.path.basename(self.load_source) == 'datapackage.json': self.load_dp = Package(self.load_source) self.resource_matcher = ResourceMatcher( self.resources, self.load_dp) dp.descriptor.setdefault('resources', []) for resource in self.load_dp.resources: if self.resource_matcher.match(resource.name): dp.add_resource(resource.descriptor) else: if os.path.exists(self.load_source): base_path = os.path.dirname(self.load_source) or '.' self.load_source = os.path.basename(self.load_source) else: base_path = None descriptor = dict(path=self.load_source, profile='tabular-data-resource') descriptor['format'] = self.options.get('format') if 'encoding' in self.options: descriptor['encoding'] = self.options['encoding'] if descriptor['format'] == 'xml' or self.load_source.endswith( '.xml'): self.options.setdefault('custom_parsers', {})['xml'] = XMLParser self.options.setdefault('ignore_blank_headers', True) self.options.setdefault('headers', 1) self.res = Resource(descriptor, base_path=base_path, **self.options) self.res.infer(confidence=1, limit=1000) if self.name is not None: self.res.descriptor['name'] = self.name if self.force_strings: for f in self.res.descriptor['schema']['fields']: f['type'] = 'string' self.res.commit() self.res.descriptor['path'] = '{name}.{format}'.format( **self.res.descriptor) dp.add_resource(self.res.descriptor) return dp def stripper(self, iterator): for r in iterator: yield dict((k, v.strip()) if isinstance(v, str) else (k, v) for k, v in r.items()) def process_resources(self, resources): yield from super(load, self).process_resources(resources) if isinstance(self.load_source, tuple): datapackage_descriptor, resources = self.load_source yield from (resource for resource, descriptor in zip( resources, datapackage_descriptor['resources']) if self.resource_matcher.match(descriptor['name'])) elif self.load_dp is not None: yield from (resource.iter(keyed=True) for resource in self.load_dp.resources if self.resource_matcher.match(resource.name)) else: it = self.res.iter(keyed=True, cast=False) if self.validate: it = schema_validator(self.res, it) if self.strip: it = self.stripper(it) yield it