def create_resource(path): """ """ mapper = {} from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor[ 'description'] = 'Installed capacities, costs and technical parameters for components' resource.descriptor['title'] = '{} components'.format( resource.name.title()) resource.descriptor['sources'] = [{ 'title': 'E-Highway 2050 installed capacities', 'path': 'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx' }] resource.descriptor['schema']['foreignKeys'] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name" } }] if 'demand' in resource.name: resource.descriptor['schema']['foreignKeys'].append({ "fields": "profile", "reference": { "resource": "demand-profiles" } }) elif 'volatile-generator' in resource.name: resource.descriptor['schema']['foreignKeys'].append({ "fields": "profile", "reference": { "resource": "generator-profiles" } }) resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json') else: print('Resource is not valid, writing resource anyway...') resource.save('resources/' + resource.name + '.json')
def create_resource(path): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'timeindex' resource.descriptor['description'] = ( 'Profiles for Run of River (ROR) components. The profile is assumed' + ' to be constant during the year.') resource.descriptor['title'] = 'ROR profiles' resource.descriptor['sources'] = [{'title': 'Assumption'}] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def create_resource(path): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'timeindex' resource.descriptor[ 'description'] = 'PV profiles (capacity factors) from renewables ninja for each country' resource.descriptor['title'] = 'PV profiles' resource.descriptor['sources'] = [{ 'title': 'Renewables Ninja PV Capacity Factors', 'path': 'https://www.renewables.ninja/static/downloads/ninja_europe_pv_v1.1.zip' }] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def infer_resources(directory="data/elements"): """ Method looks at all files in `directory` and creates datapackage.Resource object that will be stored Parameters ---------- directory: string Path to directory from where resources are inferred """ if not os.path.exists("resources"): os.makedirs("resources") # create meta data resources for f in os.listdir(directory): r = Resource({"path": os.path.join(directory, f)}) r.infer() r.save(os.path.join("resources", f.replace(".csv", ".json")))
def create_resource(path): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'timeindex' resource.descriptor['description'] = 'Demand profiles per country' resource.descriptor['title'] = 'Demand profiles' resource.descriptor['sources'] = [{ 'title': 'OPSD timeseries', 'path': 'https://data.open-power-system-data.org/time_series/2017-07-09/' + 'time_series_60min_singleindex.csv' }] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def save_datasets_as_data_packages(self, folder_path, identifier_field): """ save each dataset from a data.json source as _datapackage_ """ for dataset in self.datasets: package = Package() #TODO check this, I'm learning datapackages resource = Resource({'data': dataset}) resource.infer() #adds "name": "inline" idf = slugify(dataset[identifier_field]) resource_path = os.path.join(folder_path, f'resource_data_json_{idf}.json') if not resource.valid: raise Exception('Invalid resource') resource.save(resource_path) package.add_resource(descriptor=resource.descriptor) package_path = os.path.join(folder_path, f'pkg_data_json_{idf}.zip') package.save(target=package_path)
def create_resource(path): """ """ from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor['description'] = 'Shortage slacks for each electricity hub in the energy system representation' resource.descriptor['title'] = 'Shortage slacks for DE and its electrical neighbours' resource.descriptor['schema']['foreignKeys'] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name"}}] resource.commit() resource.descriptor if resource.valid: resource.save('resources/'+ resource.name + '.json')
def create_resource(path, title): from datapackage import Resource resource = Resource({'path': path}) resource.infer() resource.descriptor['schema']['primaryKey'] = 'name' resource.descriptor[ 'description'] = 'Installed capacities, costs and technical parameters for components' resource.descriptor['title'] = title resource.descriptor['sources'] = [{ 'title': 'Restore 2050 hydro inflow timeseries', 'path': 'https://zenodo.org/record/804244/files/Hydro_Inflow.zip' }, { 'title': 'E-Highway 2050 installed capacities', 'path': 'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx' }, { 'title': 'DIW Berlin - Current and Prospective Costs of Electricity Generation until 2050', 'path': 'https://www.diw.de/documents/publikationen/73/diw_01.c.424566.de/diw_datadoc_2013-068.pdf' }] resource.descriptor['schema']['foreignKeys'] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name" } }] resource.commit() if resource.valid: resource.save('resources/' + resource.name + '.json')
def save_datasets_as_data_packages(self, folder_path): """ save each dataset source as _datapackage_ """ for dataset in self.package_list: package = Package() #TODO check this, I'm learning datapackages resource = Resource({'data': dataset}) resource.infer() identifier = dataset['id'] bytes_identifier = identifier.encode('utf-8') encoded = base64.b64encode(bytes_identifier) encoded_identifier = str(encoded, "utf-8") resource_path = os.path.join( folder_path, f'resource_ckan_api_{encoded_identifier}.json') if not resource.valid: raise Exception('Invalid resource') resource.save(resource_path) package.add_resource(descriptor=resource.descriptor) package_path = os.path.join( folder_path, f'pkg_ckan_api_{encoded_identifier}.zip') package.save(target=package_path)
def infer_metadata( package_name="default-name", keep_resources=False, foreign_keys={ "bus": [ "volatile", "dispatchable", "storage", "load", "reservoir", "shortage", "excess", ], "profile": ["load", "volatile", "ror"], "from_to_bus": ["connection", "line", "conversion"], "chp": ["backpressure", "extraction", "chp"], }, path=None, ): """ Add basic meta data for a datapackage Parameters ---------- package_name: string Name of the data package keep_resource: boolean Flag indicating of the resources meta data json-files should be kept after main datapackage.json is created. The reource meta data will be stored in the `resources` directory. foreign_keys: dict Dictionary with foreign key specification. Keys for dictionary are: 'bus', 'profile', 'from_to_bus'. Values are list with strings with the name of the resources path: string Absoltue path to root-folder of the datapackage """ current_path = os.getcwd() if path: print("Setting current work directory to {}".format(path)) os.chdir(path) p = Package() p.descriptor["name"] = package_name p.descriptor["profile"] = "tabular-data-package" p.commit() if not os.path.exists("resources"): os.makedirs("resources") # create meta data resources elements if not os.path.exists("data/elements"): print("No data path found in directory {}. Skipping...".format( os.getcwd())) else: for f in os.listdir("data/elements"): r = Resource({"path": os.path.join("data/elements", f)}) r.infer() r.descriptor["schema"]["primaryKey"] = "name" if r.name in foreign_keys.get("bus", []): r.descriptor["schema"]["foreignKeys"] = [{ "fields": "bus", "reference": { "resource": "bus", "fields": "name" }, }] if r.name in foreign_keys.get("profile", []): r.descriptor["schema"]["foreignKeys"].append({ "fields": "profile", "reference": { "resource": r.name + "_profile" }, }) elif r.name in foreign_keys.get("from_to_bus", []): r.descriptor["schema"]["foreignKeys"] = [ { "fields": "from_bus", "reference": { "resource": "bus", "fields": "name" }, }, { "fields": "to_bus", "reference": { "resource": "bus", "fields": "name" }, }, ] elif r.name in foreign_keys.get("chp", []): r.descriptor["schema"]["foreignKeys"] = [ { "fields": "fuel_bus", "reference": { "resource": "bus", "fields": "name" }, }, { "fields": "electricity_bus", "reference": { "resource": "bus", "fields": "name" }, }, { "fields": "heat_bus", "reference": { "resource": "bus", "fields": "name" }, }, ] r.commit() r.save(os.path.join("resources", f.replace(".csv", ".json"))) p.add_resource(r.descriptor) # create meta data resources elements if not os.path.exists("data/sequences"): print("No data path found in directory {}. Skipping...".format( os.getcwd())) else: for f in os.listdir("data/sequences"): r = Resource({"path": os.path.join("data/sequences", f)}) r.infer() r.commit() r.save(os.path.join("resources", f.replace(".csv", ".json"))) p.add_resource(r.descriptor) p.commit() p.save("datapackage.json") if not keep_resources: shutil.rmtree("resources") os.chdir(current_path)
'river': 'river in which the plant is located', 'river_km': 'km from stream source', 'level_meter': 'assigned level meter for flow curve' } # create resource r = Resource({'path': 'data/runofriver.csv'}) # get basic metadata from data r.infer() # add description for fields based on mapper for i in range(len(r.descriptor['schema']['fields'])): r.descriptor['schema']['fields'][i]['description'] = \ description_mapper[r.descriptor['schema']['fields'][i]['name']] # commit (apply) changes to resource r.commit() # save the resource r.save('dataresource.json') # create a package p = Package() # add the resource descriptor p.add_resource(r.descriptor) # save the package p.save('datapackage.json')
# Tweak resource.descriptor['schema']['missingValues'] = 'N/A' resource.commit() resource.valid # False print(resource.errors) # Error: Descriptor validation error: # Invalid type: string (expected array) # at "/missingValues" in descriptor and # at "/properties/missingValues/type" in profile # Tweak-2 resource.descriptor['schema']['missingValues'] = ['', 'N/A'] resource.commit() print(resource.valid) # true # Read print(resource.read(keyed=True)) # [ # {city: 'london', location: [51.50,-0.11]}, # {city: 'paris', location: [48.85,2.30]}, # {city: 'rome', location: null}, # ] # Save resource.save('tmp/dataresource.json') # Open resource = Resource('tmp/dataresource.json', base_path='tmp') print(resource)