Example #1
0
 def create_indicator_package(self, schema, data_path, indicator_id,
                              indicator):
     package = describe_package(data_path)
     self.apply_package_properties(package, indicator)
     package.name = indicator_id
     package.title = indicator.get_name()
     resource = package.get_resource('data')
     self.apply_resource_properties(resource, indicator)
     resource.schema = schema
     resource.path = 'data.csv'
     return package
Example #2
0
def test_package_to_copy():
    source = describe_package("data/chunk*.csv")
    target = source.to_copy()
    assert source is not target
    assert source == target
Example #3
0
def get(url: str, dp: frictionless.package.Package, force: bool = False):
    """

    Retrieve data and check update.

    Parameters
    ----------
    url : str
        URL to retrieve the data from.
    dp : frictionless.package.Package
        Datapackage against which validating the data.
    force : Boolean, optional
        If True, new data will be uploaded even if the same as in the db. The default is False.

    Returns
    -------
    DataFrame
        Data in EnerMaps format.
    GeoDataFrame
        Spatial data in EnerMaps format.
    frictionless.package.Package
        Pakage descring the data.

    """
    ld = utilities.get_ld_json(url)
    csv_file = ld["distribution"][0]["contentUrl"]
    datePublished = ld["datePublished"]
    name = ld["name"].replace(" ", "_")

    # Inferring and completing metadata
    logging.info("Creating datapackage for input data")
    new_dp = frictionless.describe_package(
        csv_file,
        stats=True,
    )  # Add stats
    # Add date
    new_dp["datePublished"] = datePublished

    # Add missing valies
    new_dp.resources[0]["schema"]["missingValues"] = ["NULL"]
    for field in VALUE_VARS:
        new_dp.resources[0].schema.get_field(field).type = "number"

    # Logic for update
    if dp is not None:  # Existing dataset
        # check stats
        isChangedStats = dp["resources"][0]["stats"] != new_dp["resources"][0][
            "stats"]
        isChangedDate = dp["datePublished"] != new_dp["datePublished"]

        if (isChangedStats or isChangedDate
            ):  # Data integration will continue, regardless of force argument
            logging.info("Data has changed")
            if isValid(dp, new_dp):
                enermaps_data, spatial = prepare(new_dp, name)
        elif force:  # Data integration will continue, even if data has not changed
            logging.info("Forced update")
            if isValid(dp, new_dp):
                enermaps_data, spatial = prepare(new_dp, name)
        else:  # Data integration will stop here, returning Nones
            logging.info(
                "Data has not changed. Use --force if you want to reupload.")
            return None, None, None
    else:  # New dataset
        dp = new_dp  # this is just for the sake of the schema control
        if isValid(dp, new_dp):
            enermaps_data, spatial = prepare(new_dp, name)

    return enermaps_data, spatial, new_dp