def create_resource(path):
    """
    """
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Contains the hubs (nodes) for the energy system representation'
    resource.descriptor[
        'title'] = 'Energy system hubs for DE and its electrical neighbours'
    resource.descriptor['sources'] = [{
        'title':
        'NUTS Shapefiles',
        'path':
        'http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/NUTS_2013_10M_SH.zip',
        'files': [
            'NUTS_2013_10M_SH/data/NUTS_RG_10M_2013.shp',
            'NUTS_2013_10M_SH/data/NUTS_RG_10M_2013.dbf'
        ]
    }]
    resource.commit()
    resource.descriptor

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Installed transmission capacities from the e-highway 2050 scenario'
    resource.descriptor['title'] = 'Installed transmission capacities'
    resource.descriptor['sources'] = [{
        'title':
        'E-Highway 2050 transmission capacities',
        'path':
        'http://www.e-highway2050.eu/fileadmin/documents/' +
        'Results/e-Highway_database_per_country-08022016.xlsx'
    }]

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "from_bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }, {
        "fields": "to_bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #3
0
def create_resource(path):
    """
    """
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Excess slacks for each electricity hub in the energy system representation'
    resource.descriptor[
        'title'] = 'Excess slacks for DE and its electrical neighbours'

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    resource.commit()
    resource.descriptor

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #4
0
def create_resource(path):
    """
    """

    mapper = {}

    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Installed capacities, costs and technical parameters for components'
    resource.descriptor['title'] = '{} components'.format(
        resource.name.title())
    resource.descriptor['sources'] = [{
        'title':
        'E-Highway 2050 installed capacities',
        'path':
        'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx'
    }]

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    if 'demand' in resource.name:
        resource.descriptor['schema']['foreignKeys'].append({
            "fields": "profile",
            "reference": {
                "resource": "demand-profiles"
            }
        })

    elif 'volatile-generator' in resource.name:
        resource.descriptor['schema']['foreignKeys'].append({
            "fields": "profile",
            "reference": {
                "resource": "generator-profiles"
            }
        })

    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
    else:
        print('Resource is not valid, writing resource anyway...')
        resource.save('resources/' + resource.name + '.json')
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'timeindex'
    resource.descriptor['description'] = (
        'Profiles for Run of River (ROR) components. The profile is assumed' +
        ' to be constant during the year.')
    resource.descriptor['title'] = 'ROR profiles'
    resource.descriptor['sources'] = [{'title': 'Assumption'}]
    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'timeindex'
    resource.descriptor[
        'description'] = 'PV profiles (capacity factors) from renewables ninja for each country'
    resource.descriptor['title'] = 'PV profiles'
    resource.descriptor['sources'] = [{
        'title':
        'Renewables Ninja PV Capacity Factors',
        'path':
        'https://www.renewables.ninja/static/downloads/ninja_europe_pv_v1.1.zip'
    }]
    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'timeindex'
    resource.descriptor['description'] = 'Demand profiles per country'
    resource.descriptor['title'] = 'Demand profiles'
    resource.descriptor['sources'] = [{
        'title':
        'OPSD timeseries',
        'path':
        'https://data.open-power-system-data.org/time_series/2017-07-09/' +
        'time_series_60min_singleindex.csv'
    }]
    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #8
0
def create_resource(path, title):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Installed capacities, costs and technical parameters for components'
    resource.descriptor['title'] = title
    resource.descriptor['sources'] = [{
        'title':
        'Restore 2050 hydro inflow timeseries',
        'path':
        'https://zenodo.org/record/804244/files/Hydro_Inflow.zip'
    }, {
        'title':
        'E-Highway 2050 installed capacities',
        'path':
        'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx'
    }, {
        'title':
        'DIW Berlin - Current and Prospective Costs of Electricity Generation until 2050',
        'path':
        'https://www.diw.de/documents/publikationen/73/diw_01.c.424566.de/diw_datadoc_2013-068.pdf'
    }]

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #9
0
def infer_metadata(
    package_name="default-name",
    keep_resources=False,
    foreign_keys={
        "bus": [
            "volatile",
            "dispatchable",
            "storage",
            "load",
            "reservoir",
            "shortage",
            "excess",
        ],
        "profile": ["load", "volatile", "ror"],
        "from_to_bus": ["connection", "line", "conversion"],
        "chp": ["backpressure", "extraction", "chp"],
    },
    path=None,
):
    """ Add basic meta data for a datapackage

    Parameters
    ----------
    package_name: string
        Name of the data package
    keep_resource: boolean
        Flag indicating of the resources meta data json-files should be kept
        after main datapackage.json is created. The reource meta data will
        be stored in the `resources` directory.
    foreign_keys: dict
        Dictionary with foreign key specification. Keys for dictionary are:
        'bus', 'profile', 'from_to_bus'. Values are list with
        strings with the name of the resources
    path: string
        Absoltue path to root-folder of the datapackage
    """
    current_path = os.getcwd()
    if path:
        print("Setting current work directory to {}".format(path))
        os.chdir(path)

    p = Package()
    p.descriptor["name"] = package_name
    p.descriptor["profile"] = "tabular-data-package"
    p.commit()
    if not os.path.exists("resources"):
        os.makedirs("resources")

    # create meta data resources elements
    if not os.path.exists("data/elements"):
        print("No data path found in directory {}. Skipping...".format(
            os.getcwd()))
    else:
        for f in os.listdir("data/elements"):
            r = Resource({"path": os.path.join("data/elements", f)})
            r.infer()
            r.descriptor["schema"]["primaryKey"] = "name"

            if r.name in foreign_keys.get("bus", []):
                r.descriptor["schema"]["foreignKeys"] = [{
                    "fields": "bus",
                    "reference": {
                        "resource": "bus",
                        "fields": "name"
                    },
                }]

                if r.name in foreign_keys.get("profile", []):
                    r.descriptor["schema"]["foreignKeys"].append({
                        "fields": "profile",
                        "reference": {
                            "resource": r.name + "_profile"
                        },
                    })

            elif r.name in foreign_keys.get("from_to_bus", []):
                r.descriptor["schema"]["foreignKeys"] = [
                    {
                        "fields": "from_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                    {
                        "fields": "to_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                ]

            elif r.name in foreign_keys.get("chp", []):
                r.descriptor["schema"]["foreignKeys"] = [
                    {
                        "fields": "fuel_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                    {
                        "fields": "electricity_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                    {
                        "fields": "heat_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                ]

            r.commit()
            r.save(os.path.join("resources", f.replace(".csv", ".json")))
            p.add_resource(r.descriptor)

    # create meta data resources elements
    if not os.path.exists("data/sequences"):
        print("No data path found in directory {}. Skipping...".format(
            os.getcwd()))
    else:
        for f in os.listdir("data/sequences"):
            r = Resource({"path": os.path.join("data/sequences", f)})
            r.infer()
            r.commit()
            r.save(os.path.join("resources", f.replace(".csv", ".json")))
            p.add_resource(r.descriptor)

    p.commit()
    p.save("datapackage.json")

    if not keep_resources:
        shutil.rmtree("resources")

    os.chdir(current_path)
Example #10
0
    'river': 'river in which the plant is located',
    'river_km': 'km from stream source',
    'level_meter': 'assigned level meter for flow curve'
}

# create resource
r = Resource({'path': 'data/runofriver.csv'})

# get basic metadata from data
r.infer()

# add description for fields based on mapper
for i in range(len(r.descriptor['schema']['fields'])):
    r.descriptor['schema']['fields'][i]['description'] = \
        description_mapper[r.descriptor['schema']['fields'][i]['name']]

# commit (apply) changes to resource
r.commit()

# save the resource
r.save('dataresource.json')

# create a package
p = Package()

# add the resource descriptor
p.add_resource(r.descriptor)

# save the package
p.save('datapackage.json')
Example #11
0
# Infer
resource.infer()
print(resource.descriptor)
#{ path: 'data.csv',
#  profile: 'tabular-data-resource',
#  encoding: 'utf-8',
#  name: 'data',
#  format: 'csv',
#  mediatype: 'text/csv',
# schema: { fields: [ [Object], [Object] ], missingValues: [ '' ] } }
# resource.read(keyed=True)
# Fails with a data validation error

# Tweak
resource.descriptor['schema']['missingValues'] = 'N/A'
resource.commit()
resource.valid # False
print(resource.errors)
# Error: Descriptor validation error:
#   Invalid type: string (expected array)
#    at "/missingValues" in descriptor and
#    at "/properties/missingValues/type" in profile

# Tweak-2
resource.descriptor['schema']['missingValues'] = ['', 'N/A']
resource.commit()
print(resource.valid) # true

# Read
print(resource.read(keyed=True))
# [
Example #12
0
class load(DataStreamProcessor):
    def __init__(self,
                 load_source,
                 name=None,
                 resources=None,
                 validate=False,
                 strip=True,
                 **options):
        super(load, self).__init__()
        self.load_source = load_source
        self.options = options
        self.name = name
        self.resources = resources
        self.load_dp = None
        self.validate = validate
        self.strip = strip
        self.force_strings = options.get('force_strings') is True

    def process_datapackage(self, dp: Package):
        if isinstance(self.load_source, tuple):
            datapackage_descriptor, _ = self.load_source
            dp.descriptor.setdefault('resources', [])
            self.resource_matcher = ResourceMatcher(self.resources,
                                                    datapackage_descriptor)
            for resource_descriptor in datapackage_descriptor['resources']:
                if self.resource_matcher.match(resource_descriptor['name']):
                    dp.add_resource(resource_descriptor)
        else:  # load_source is string:
            if self.load_source.startswith('env://'):
                env_var = self.load_source[6:]
                self.load_source = os.environ.get(env_var)
                if self.load_source is None:
                    raise ValueError(
                        f"Couldn't find value for env var '{env_var}'")
            if os.path.basename(self.load_source) == 'datapackage.json':
                self.load_dp = Package(self.load_source)
                self.resource_matcher = ResourceMatcher(
                    self.resources, self.load_dp)
                dp.descriptor.setdefault('resources', [])
                for resource in self.load_dp.resources:
                    if self.resource_matcher.match(resource.name):
                        dp.add_resource(resource.descriptor)
            else:
                if os.path.exists(self.load_source):
                    base_path = os.path.dirname(self.load_source) or '.'
                    self.load_source = os.path.basename(self.load_source)
                else:
                    base_path = None
                descriptor = dict(path=self.load_source,
                                  profile='tabular-data-resource')
                descriptor['format'] = self.options.get('format')
                if 'encoding' in self.options:
                    descriptor['encoding'] = self.options['encoding']
                if descriptor['format'] == 'xml' or self.load_source.endswith(
                        '.xml'):
                    self.options.setdefault('custom_parsers',
                                            {})['xml'] = XMLParser
                self.options.setdefault('ignore_blank_headers', True)
                self.options.setdefault('headers', 1)
                self.res = Resource(descriptor,
                                    base_path=base_path,
                                    **self.options)
                self.res.infer(confidence=1, limit=1000)
                if self.name is not None:
                    self.res.descriptor['name'] = self.name
                if self.force_strings:
                    for f in self.res.descriptor['schema']['fields']:
                        f['type'] = 'string'
                self.res.commit()
                self.res.descriptor['path'] = '{name}.{format}'.format(
                    **self.res.descriptor)
                dp.add_resource(self.res.descriptor)
        return dp

    def stripper(self, iterator):
        for r in iterator:
            yield dict((k, v.strip()) if isinstance(v, str) else (k, v)
                       for k, v in r.items())

    def process_resources(self, resources):
        yield from super(load, self).process_resources(resources)
        if isinstance(self.load_source, tuple):
            datapackage_descriptor, resources = self.load_source
            yield from (resource for resource, descriptor in zip(
                resources, datapackage_descriptor['resources'])
                        if self.resource_matcher.match(descriptor['name']))
        elif self.load_dp is not None:
            yield from (resource.iter(keyed=True)
                        for resource in self.load_dp.resources
                        if self.resource_matcher.match(resource.name))
        else:
            it = self.res.iter(keyed=True, cast=False)
            if self.validate:
                it = schema_validator(self.res, it)
            if self.strip:
                it = self.stripper(it)
            yield it