def create_indicator_package(self, schema, data_path, indicator_id, indicator): package = describe_package(data_path) self.apply_package_properties(package, indicator) package.name = indicator_id package.title = indicator.get_name() resource = package.get_resource('data') self.apply_resource_properties(resource, indicator) resource.schema = schema resource.path = 'data.csv' return package
def test_package_to_copy(): source = describe_package("data/chunk*.csv") target = source.to_copy() assert source is not target assert source == target
def get(url: str, dp: frictionless.package.Package, force: bool = False): """ Retrieve data and check update. Parameters ---------- url : str URL to retrieve the data from. dp : frictionless.package.Package Datapackage against which validating the data. force : Boolean, optional If True, new data will be uploaded even if the same as in the db. The default is False. Returns ------- DataFrame Data in EnerMaps format. GeoDataFrame Spatial data in EnerMaps format. frictionless.package.Package Pakage descring the data. """ ld = utilities.get_ld_json(url) csv_file = ld["distribution"][0]["contentUrl"] datePublished = ld["datePublished"] name = ld["name"].replace(" ", "_") # Inferring and completing metadata logging.info("Creating datapackage for input data") new_dp = frictionless.describe_package( csv_file, stats=True, ) # Add stats # Add date new_dp["datePublished"] = datePublished # Add missing valies new_dp.resources[0]["schema"]["missingValues"] = ["NULL"] for field in VALUE_VARS: new_dp.resources[0].schema.get_field(field).type = "number" # Logic for update if dp is not None: # Existing dataset # check stats isChangedStats = dp["resources"][0]["stats"] != new_dp["resources"][0][ "stats"] isChangedDate = dp["datePublished"] != new_dp["datePublished"] if (isChangedStats or isChangedDate ): # Data integration will continue, regardless of force argument logging.info("Data has changed") if isValid(dp, new_dp): enermaps_data, spatial = prepare(new_dp, name) elif force: # Data integration will continue, even if data has not changed logging.info("Forced update") if isValid(dp, new_dp): enermaps_data, spatial = prepare(new_dp, name) else: # Data integration will stop here, returning Nones logging.info( "Data has not changed. Use --force if you want to reupload.") return None, None, None else: # New dataset dp = new_dp # this is just for the sake of the schema control if isValid(dp, new_dp): enermaps_data, spatial = prepare(new_dp, name) return enermaps_data, spatial, new_dp