def create_resource(path):
    """
    """
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Contains the hubs (nodes) for the energy system representation'
    resource.descriptor[
        'title'] = 'Energy system hubs for DE and its electrical neighbours'
    resource.descriptor['sources'] = [{
        'title':
        'NUTS Shapefiles',
        'path':
        'http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/NUTS_2013_10M_SH.zip',
        'files': [
            'NUTS_2013_10M_SH/data/NUTS_RG_10M_2013.shp',
            'NUTS_2013_10M_SH/data/NUTS_RG_10M_2013.dbf'
        ]
    }]
    resource.commit()
    resource.descriptor

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Installed transmission capacities from the e-highway 2050 scenario'
    resource.descriptor['title'] = 'Installed transmission capacities'
    resource.descriptor['sources'] = [{
        'title':
        'E-Highway 2050 transmission capacities',
        'path':
        'http://www.e-highway2050.eu/fileadmin/documents/' +
        'Results/e-Highway_database_per_country-08022016.xlsx'
    }]

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "from_bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }, {
        "fields": "to_bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #3
0
class iterable_loader(DataStreamProcessor):
    def __init__(self, iterable, name=None):
        super(iterable_loader, self).__init__()
        self.iterable = iterable
        self.name = name

    def handle_iterable(self):
        mode = None
        for x in self.iterable:
            if mode is None:
                assert isinstance(x, (dict, list))
                mode = dict if isinstance(x, dict) else list
            assert isinstance(x, mode)
            if mode == dict:
                yield x
            else:
                yield dict(zip(('col{}'.format(i) for i in range(len(x))), x))

    def process_datapackage(self, dp: Package):
        name = self.name
        if name is None:
            name = 'res_{}'.format(len(dp.resources) + 1)
        self.res = Resource(dict(name=name, path='{}.csv'.format(name)),
                            storage=iterable_storage(self.handle_iterable()))
        self.res.infer()
        dp.descriptor.setdefault('resources', []).append(self.res.descriptor)
        return dp

    def process_resources(self, resources):
        yield from super(iterable_loader, self).process_resources(resources)
        yield self.res.iter(keyed=True)
Example #4
0
    def save_datasets_as_data_packages(self, folder_path):
        """ save each dataset from a data.json source as _datapackage_ """
        for dataset in self.datasets:
            package = Package()

            #TODO check this, I'm learning datapackages
            resource = Resource({'data': dataset})
            resource.infer()  #adds "name": "inline"

            #FIXME identifier uses incompables characthers as paths (e.g. /).
            # could exist duplicates paths from different resources
            # use BASE64 or hashes
            idf = slugify(dataset['identifier'])

            resource_path = os.path.join(folder_path,
                                         f'resource_data_json_{idf}.json')
            if not resource.valid:
                raise Exception('Invalid resource')

            resource.save(resource_path)

            package.add_resource(descriptor=resource.descriptor)
            package_path = os.path.join(folder_path,
                                        f'pkg_data_json_{idf}.zip')
            package.save(target=package_path)
Example #5
0
def save_as_data_packages(row):
    """ save dataset from data.json as data package
        We will use this files as a queue to process later """
    # TODO check if ckanext-datapackager is useful for import
    # or export resources:
    # https://github.com/frictionlessdata/ckanext-datapackager

    package = Package()

    # TODO check this, I'm learning datapackages.
    resource = Resource({'data': row})
    resource.infer()  # adds "name": "inline"
    if not resource.valid:
        raise Exception('Invalid resource')

    encoded_identifier = encode_identifier(identifier=row['identifier'])

    # resource_path = os.path.join(path, f'{prefix}_{encoded_identifier}.json')
    # resource.save(resource_path)

    package.add_resource(descriptor=resource.descriptor)
    folder = config.get_data_packages_folder_path()
    filename = f'data-json-{encoded_identifier}.json'
    package_path = os.path.join(folder, filename)

    # no not rewrite if exists
    if not os.path.isfile(package_path):
        package.save(target=package_path)
Example #6
0
    def save_datasets_as_data_packages(self, folder_path):
        """ save each dataset from a data.json source as _datapackage_ """
        for dataset in self.package_list:
            package = Package()

            #TODO check this, I'm learning datapackages
            resource = Resource({'data': dataset})
            resource.infer()  #adds "name": "inline"

            identifier = dataset['id']
            bytes_identifier = identifier.encode('utf-8')
            encoded = base64.b64encode(bytes_identifier)
            encoded_identifier = str(encoded, "utf-8")

            resource_path = os.path.join(
                folder_path, f'resource_ckan_api_{encoded_identifier}.json')
            if not resource.valid:
                raise Exception('Invalid resource')

            resource.save(resource_path)

            package.add_resource(descriptor=resource.descriptor)
            package_path = os.path.join(
                folder_path, f'pkg_ckan_api_{encoded_identifier}.zip')
            package.save(target=package_path)
Example #7
0
def create_resource(path):
    """
    """
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Excess slacks for each electricity hub in the energy system representation'
    resource.descriptor[
        'title'] = 'Excess slacks for DE and its electrical neighbours'

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    resource.commit()
    resource.descriptor

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #8
0
def create_resource(path):
    """
    """

    mapper = {}

    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Installed capacities, costs and technical parameters for components'
    resource.descriptor['title'] = '{} components'.format(
        resource.name.title())
    resource.descriptor['sources'] = [{
        'title':
        'E-Highway 2050 installed capacities',
        'path':
        'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx'
    }]

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    if 'demand' in resource.name:
        resource.descriptor['schema']['foreignKeys'].append({
            "fields": "profile",
            "reference": {
                "resource": "demand-profiles"
            }
        })

    elif 'volatile-generator' in resource.name:
        resource.descriptor['schema']['foreignKeys'].append({
            "fields": "profile",
            "reference": {
                "resource": "generator-profiles"
            }
        })

    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
    else:
        print('Resource is not valid, writing resource anyway...')
        resource.save('resources/' + resource.name + '.json')
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'timeindex'
    resource.descriptor['description'] = (
        'Profiles for Run of River (ROR) components. The profile is assumed' +
        ' to be constant during the year.')
    resource.descriptor['title'] = 'ROR profiles'
    resource.descriptor['sources'] = [{'title': 'Assumption'}]
    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #10
0
def infer_resources(directory="data/elements"):
    """ Method looks at all files in `directory` and creates
    datapackage.Resource object that will be stored

    Parameters
    ----------
    directory: string
        Path to directory from where resources are inferred

    """
    if not os.path.exists("resources"):
        os.makedirs("resources")

    # create meta data resources
    for f in os.listdir(directory):
        r = Resource({"path": os.path.join(directory, f)})
        r.infer()
        r.save(os.path.join("resources", f.replace(".csv", ".json")))
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'timeindex'
    resource.descriptor['description'] = 'Demand profiles per country'
    resource.descriptor['title'] = 'Demand profiles'
    resource.descriptor['sources'] = [{
        'title':
        'OPSD timeseries',
        'path':
        'https://data.open-power-system-data.org/time_series/2017-07-09/' +
        'time_series_60min_singleindex.csv'
    }]
    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
def create_resource(path):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'timeindex'
    resource.descriptor[
        'description'] = 'PV profiles (capacity factors) from renewables ninja for each country'
    resource.descriptor['title'] = 'PV profiles'
    resource.descriptor['sources'] = [{
        'title':
        'Renewables Ninja PV Capacity Factors',
        'path':
        'https://www.renewables.ninja/static/downloads/ninja_europe_pv_v1.1.zip'
    }]
    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
    def save_datasets_as_data_packages(self, folder_path, identifier_field):
        """ save each dataset from a data.json source as _datapackage_ """
        for dataset in self.datasets:
            package = Package()

            #TODO check this, I'm learning datapackages
            resource = Resource({'data': dataset})
            resource.infer()  #adds "name": "inline"

            idf = slugify(dataset[identifier_field])

            resource_path = os.path.join(folder_path, f'resource_data_json_{idf}.json')
            if not resource.valid:
                raise Exception('Invalid resource')

            resource.save(resource_path)

            package.add_resource(descriptor=resource.descriptor)
            package_path = os.path.join(folder_path, f'pkg_data_json_{idf}.zip')
            package.save(target=package_path)
Example #14
0
def create_resource(path, title):
    from datapackage import Resource
    resource = Resource({'path': path})
    resource.infer()
    resource.descriptor['schema']['primaryKey'] = 'name'
    resource.descriptor[
        'description'] = 'Installed capacities, costs and technical parameters for components'
    resource.descriptor['title'] = title
    resource.descriptor['sources'] = [{
        'title':
        'Restore 2050 hydro inflow timeseries',
        'path':
        'https://zenodo.org/record/804244/files/Hydro_Inflow.zip'
    }, {
        'title':
        'E-Highway 2050 installed capacities',
        'path':
        'http://www.e-highway2050.eu/fileadmin/documents/Results/e-Highway2050_2050_Country_and_cluster_installed_capacities_31-03-2015.xlsx'
    }, {
        'title':
        'DIW Berlin - Current and Prospective Costs of Electricity Generation until 2050',
        'path':
        'https://www.diw.de/documents/publikationen/73/diw_01.c.424566.de/diw_datadoc_2013-068.pdf'
    }]

    resource.descriptor['schema']['foreignKeys'] = [{
        "fields": "bus",
        "reference": {
            "resource": "bus",
            "fields": "name"
        }
    }]

    resource.commit()

    if resource.valid:
        resource.save('resources/' + resource.name + '.json')
Example #15
0
class load(DataStreamProcessor):
    def __init__(self,
                 load_source,
                 name=None,
                 resources=None,
                 validate=False,
                 strip=True,
                 **options):
        super(load, self).__init__()
        self.load_source = load_source
        self.options = options
        self.name = name
        self.resources = resources
        self.load_dp = None
        self.validate = validate
        self.strip = strip
        self.force_strings = options.get('force_strings') is True

    def process_datapackage(self, dp: Package):
        if isinstance(self.load_source, tuple):
            datapackage_descriptor, _ = self.load_source
            dp.descriptor.setdefault('resources', [])
            self.resource_matcher = ResourceMatcher(self.resources,
                                                    datapackage_descriptor)
            for resource_descriptor in datapackage_descriptor['resources']:
                if self.resource_matcher.match(resource_descriptor['name']):
                    dp.add_resource(resource_descriptor)
        else:  # load_source is string:
            if self.load_source.startswith('env://'):
                env_var = self.load_source[6:]
                self.load_source = os.environ.get(env_var)
                if self.load_source is None:
                    raise ValueError(
                        f"Couldn't find value for env var '{env_var}'")
            if os.path.basename(self.load_source) == 'datapackage.json':
                self.load_dp = Package(self.load_source)
                self.resource_matcher = ResourceMatcher(
                    self.resources, self.load_dp)
                dp.descriptor.setdefault('resources', [])
                for resource in self.load_dp.resources:
                    if self.resource_matcher.match(resource.name):
                        dp.add_resource(resource.descriptor)
            else:
                if os.path.exists(self.load_source):
                    base_path = os.path.dirname(self.load_source) or '.'
                    self.load_source = os.path.basename(self.load_source)
                else:
                    base_path = None
                descriptor = dict(path=self.load_source,
                                  profile='tabular-data-resource')
                descriptor['format'] = self.options.get('format')
                if 'encoding' in self.options:
                    descriptor['encoding'] = self.options['encoding']
                if descriptor['format'] == 'xml' or self.load_source.endswith(
                        '.xml'):
                    self.options.setdefault('custom_parsers',
                                            {})['xml'] = XMLParser
                self.options.setdefault('ignore_blank_headers', True)
                self.options.setdefault('headers', 1)
                self.res = Resource(descriptor,
                                    base_path=base_path,
                                    **self.options)
                self.res.infer(confidence=1, limit=1000)
                if self.name is not None:
                    self.res.descriptor['name'] = self.name
                if self.force_strings:
                    for f in self.res.descriptor['schema']['fields']:
                        f['type'] = 'string'
                self.res.commit()
                self.res.descriptor['path'] = '{name}.{format}'.format(
                    **self.res.descriptor)
                dp.add_resource(self.res.descriptor)
        return dp

    def stripper(self, iterator):
        for r in iterator:
            yield dict((k, v.strip()) if isinstance(v, str) else (k, v)
                       for k, v in r.items())

    def process_resources(self, resources):
        yield from super(load, self).process_resources(resources)
        if isinstance(self.load_source, tuple):
            datapackage_descriptor, resources = self.load_source
            yield from (resource for resource, descriptor in zip(
                resources, datapackage_descriptor['resources'])
                        if self.resource_matcher.match(descriptor['name']))
        elif self.load_dp is not None:
            yield from (resource.iter(keyed=True)
                        for resource in self.load_dp.resources
                        if self.resource_matcher.match(resource.name))
        else:
            it = self.res.iter(keyed=True, cast=False)
            if self.validate:
                it = schema_validator(self.res, it)
            if self.strip:
                it = self.stripper(it)
            yield it
Example #16
0
    'total_flow': 'inflow to the power plant in mio m3',
    'flo_river_ror': 'next downstream res_nr',
    'status': 'operational status of the plant',
    'company': None,
    'turbtype': 'optional: turbine type',
    'geodbid': 'specified id for geo referencing',
    'river': 'river in which the plant is located',
    'river_km': 'km from stream source',
    'level_meter': 'assigned level meter for flow curve'
}

# create resource
r = Resource({'path': 'data/runofriver.csv'})

# get basic metadata from data
r.infer()

# add description for fields based on mapper
for i in range(len(r.descriptor['schema']['fields'])):
    r.descriptor['schema']['fields'][i]['description'] = \
        description_mapper[r.descriptor['schema']['fields'][i]['name']]

# commit (apply) changes to resource
r.commit()

# save the resource
r.save('dataresource.json')

# create a package
p = Package()
Example #17
0
def temporal_clustering(datapackage, n, path="/tmp", how="daily"):
    """ Creates a new datapackage by aggregating sequences inside the
    `sequence` folder of the specified datapackage by clustering `n` timesteps

    Parameters
    ----------
    datapackage: string
        String of meta data file datapackage.json
    n: integer
        Number of clusters
    path: string
        Path to directory where the aggregated datapackage is stored
    how: string
        How to cluster 'daily' or 'hourly'
    """
    if how == "weekly":
        raise NotImplementedError("Weekly clustering is not implemented!")

    p = Package(datapackage)

    cwd = os.getcwd()

    copied_package_name = (p.descriptor["name"] + "__temporal_cluster__" +
                           how + "_" + str(n))

    copy_path = os.path.join(path, p.descriptor["name"], copied_package_name)

    copied_root = copy_datapackage(datapackage,
                                   os.path.abspath(copy_path),
                                   subset="data")

    sequence_resources = [
        r for r in p.resources
        if re.match(r"^data/sequences/.*$", r.descriptor["path"])
    ]

    dfs = {
        r.name:
        pd.DataFrame(r.read(keyed="True")).set_index("timeindex").astype(float)
        for r in sequence_resources
    }
    sequences = pd.concat(dfs.values(), axis=1)

    if how == "daily":
        hoursPerPeriod = 24
    elif how == "hourly":
        hoursPerPeriod = 1
    elif how == "weekly":
        hoursPerPeriod = 24 * 7

    aggregation = tsam.TimeSeriesAggregation(
        sequences,
        noTypicalPeriods=n,
        rescaleClusterPeriods=False,
        hoursPerPeriod=hoursPerPeriod,
        clusterMethod="hierarchical",
    )

    cluster_weights = {
        aggregation.clusterCenterIndices[n]: w
        for n, w in aggregation.clusterPeriodNoOccur.items()
    }
    if how == "daily":
        temporal = pd.Series(
            {
                d: cluster_weights[d.dayofyear]
                for d in sequences.index
                if d.dayofyear in aggregation.clusterCenterIndices
            },
            name="weighting",
        )
        temporal.index.name = "timeindex"

    elif how == "hourly":
        temporal = pd.Series(
            {
                h: cluster_weights[sequences.index.get_loc(h)]
                for h in sequences.index if sequences.index.get_loc(h) in
                aggregation.clusterCenterIndices
            },
            name="weighting",
        )
        temporal.index.name = "timeindex"

    # write resources to copied package (should not interfer with meta data)
    # as columns are not removed and sorted when written.
    os.chdir(copied_root)
    for r in sequence_resources:
        write_sequences(r.name + ".csv",
                        dfs[r.name].loc[temporal.index],
                        replace=True)

    # write temporal information from clustering
    temporal.to_csv(
        "data/temporal.csv",
        header=True,
        sep=";",
        date_format="%Y-%m-%dT%H:%M:%SZ",
    )
    # add meta data for new temporal information
    r = Resource({"path": "data/temporal.csv"})
    r.infer()
    # TODO: Add meta-data description
    r.descriptor[
        "description"] = "Temporal selection based on hierachical clustering..."

    # Update meta-data of copied package
    cp = Package("datapackage.json")
    cp.descriptor["name"] = copied_package_name
    cp.descriptor["resources"].append(r.descriptor)
    cp.commit()
    cp.save("datapackage.json")

    # set back to 'old' workdirectory
    os.chdir(cwd)

    return copied_root
Example #18
0
def temporal_skip(datapackage, n, path="/tmp", name=None, *args):
    """ Creates a new datapackage by aggregating sequences inside the
    `sequence` folder of the specified datapackage by skipping `n` timesteps

    Parameters
    ----------
    datapackage: string
        String of meta data file datapackage.json
    n: integer
        Number of timesteps to skip
    path: string
        Path to directory where the aggregated datapackage is stored
    name: string
        Name of the new, aggregated datapackage. If not specified a name will
        be given
    """
    p = Package(datapackage)

    cwd = os.getcwd()

    if name is None:
        copied_package_name = (p.descriptor["name"] + "__temporal_skip__" +
                               str(n))
    else:
        copied_package_name = name

    copy_path = os.path.join(path, copied_package_name)

    copied_root = copy_datapackage(datapackage,
                                   os.path.abspath(copy_path),
                                   subset="data")

    sequence_resources = [
        r for r in p.resources
        if re.match(r"^data/sequences/.*$", r.descriptor["path"])
    ]

    dfs = {
        r.name:
        pd.DataFrame(r.read(keyed="True")).set_index("timeindex").astype(float)
        for r in sequence_resources
    }
    sequences = pd.concat(dfs.values(), axis=1)

    skip_sequences = sequences.loc[::n]

    temporal = pd.Series(data=n, index=skip_sequences.index, name="weighting")
    temporal.index.name = "timeindex"

    os.chdir(copied_root)

    for r in sequence_resources:
        write_sequences(r.name + ".csv",
                        dfs[r.name].loc[temporal.index],
                        replace=True)

    # write temporal information from clustering
    temporal.to_csv(
        "data/temporal.csv",
        header=True,
        sep=";",
        date_format="%Y-%m-%dT%H:%M:%SZ",
    )
    # add meta data for new temporal information
    r = Resource({"path": "data/temporal.csv"})
    r.infer()

    r.descriptor[
        "description"] = "Temporal selection based on skipped timesteps. Skipped n={}".format(
            n)

    # Update meta-data of copied package
    cp = Package("datapackage.json")
    cp.descriptor["name"] = copied_package_name
    cp.descriptor["resources"].append(r.descriptor)
    cp.commit()
    cp.save("datapackage.json")

    # set back to 'old' workdirectory
    os.chdir(cwd)

    return copied_root
Example #19
0
def infer_metadata(
    package_name="default-name",
    keep_resources=False,
    foreign_keys={
        "bus": [
            "volatile",
            "dispatchable",
            "storage",
            "load",
            "reservoir",
            "shortage",
            "excess",
        ],
        "profile": ["load", "volatile", "ror"],
        "from_to_bus": ["connection", "line", "conversion"],
        "chp": ["backpressure", "extraction", "chp"],
    },
    path=None,
):
    """ Add basic meta data for a datapackage

    Parameters
    ----------
    package_name: string
        Name of the data package
    keep_resource: boolean
        Flag indicating of the resources meta data json-files should be kept
        after main datapackage.json is created. The reource meta data will
        be stored in the `resources` directory.
    foreign_keys: dict
        Dictionary with foreign key specification. Keys for dictionary are:
        'bus', 'profile', 'from_to_bus'. Values are list with
        strings with the name of the resources
    path: string
        Absoltue path to root-folder of the datapackage
    """
    current_path = os.getcwd()
    if path:
        print("Setting current work directory to {}".format(path))
        os.chdir(path)

    p = Package()
    p.descriptor["name"] = package_name
    p.descriptor["profile"] = "tabular-data-package"
    p.commit()
    if not os.path.exists("resources"):
        os.makedirs("resources")

    # create meta data resources elements
    if not os.path.exists("data/elements"):
        print("No data path found in directory {}. Skipping...".format(
            os.getcwd()))
    else:
        for f in os.listdir("data/elements"):
            r = Resource({"path": os.path.join("data/elements", f)})
            r.infer()
            r.descriptor["schema"]["primaryKey"] = "name"

            if r.name in foreign_keys.get("bus", []):
                r.descriptor["schema"]["foreignKeys"] = [{
                    "fields": "bus",
                    "reference": {
                        "resource": "bus",
                        "fields": "name"
                    },
                }]

                if r.name in foreign_keys.get("profile", []):
                    r.descriptor["schema"]["foreignKeys"].append({
                        "fields": "profile",
                        "reference": {
                            "resource": r.name + "_profile"
                        },
                    })

            elif r.name in foreign_keys.get("from_to_bus", []):
                r.descriptor["schema"]["foreignKeys"] = [
                    {
                        "fields": "from_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                    {
                        "fields": "to_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                ]

            elif r.name in foreign_keys.get("chp", []):
                r.descriptor["schema"]["foreignKeys"] = [
                    {
                        "fields": "fuel_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                    {
                        "fields": "electricity_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                    {
                        "fields": "heat_bus",
                        "reference": {
                            "resource": "bus",
                            "fields": "name"
                        },
                    },
                ]

            r.commit()
            r.save(os.path.join("resources", f.replace(".csv", ".json")))
            p.add_resource(r.descriptor)

    # create meta data resources elements
    if not os.path.exists("data/sequences"):
        print("No data path found in directory {}. Skipping...".format(
            os.getcwd()))
    else:
        for f in os.listdir("data/sequences"):
            r = Resource({"path": os.path.join("data/sequences", f)})
            r.infer()
            r.commit()
            r.save(os.path.join("resources", f.replace(".csv", ".json")))
            p.add_resource(r.descriptor)

    p.commit()
    p.save("datapackage.json")

    if not keep_resources:
        shutil.rmtree("resources")

    os.chdir(current_path)
Example #20
0
from datapackage import Resource

# Create
resource = Resource({'path': 'data/data.csv'})
resource.tabular # true
resource.headers # ['city', 'location']
print(resource.read(keyed=True))
# [
#   {city: 'london', location: '51.50,-0.11'},
#   {city: 'paris', location: '48.85,2.30'},
#   {city: 'rome', location: 'N/A'},
# ]

# Infer
resource.infer()
print(resource.descriptor)
#{ path: 'data.csv',
#  profile: 'tabular-data-resource',
#  encoding: 'utf-8',
#  name: 'data',
#  format: 'csv',
#  mediatype: 'text/csv',
# schema: { fields: [ [Object], [Object] ], missingValues: [ '' ] } }
# resource.read(keyed=True)
# Fails with a data validation error

# Tweak
resource.descriptor['schema']['missingValues'] = 'N/A'
resource.commit()
resource.valid # False
print(resource.errors)