def test_get_data_at_different_levels(self): file_path = os.path.join(DATA_DIR, "exp_pers_amne_gy_kommun_amne_2018.xml") with open(file_path) as f: content = f.read() data = [x for x in get_data_from_xml(content)] assert data[0]["niva"] == "kommun"
def test_get_data_from_xml_with_uttag_dimension(self): file_path = os.path.join(DATA_DIR, "exp_pers_amne_gr_skola_2014_sample.xml") with open(file_path) as f: content = f.read() data = [x for x in get_data_from_xml(content)] assert data[0]["uttag"] == "2015-08-17"
def test_get_data_from_xml_with_amne_dimension(self): file_path = os.path.join(DATA_DIR, "exp_personal_alder_gr_kommun_2017_sample.xml") with open(file_path) as f: content = f.read() data = [x for x in get_data_from_xml(content)] assert "amne" in data[0] assert data[0]["amne"] == u"Samtliga lärare"
def get_data_from_xml(self): file_path = os.path.join(DATA_DIR, "exp_kostnader_kommun_fklass_2016.xml") with open(file_path) as f: content = f.read() data = [x for x in get_data_from_xml(content)] assert len(data) == 1740 assert data[0]["niva"] == "skola"
def _fetch_data(self, dataset, query): """Make the actual query. The only queryable dimensions are period. >>> dataset.fetch({"period": "2016"}) >>> dataset.fetch({"period": ["2015", "2016"]}) >>> dataset.fetch({"period": "*"}) # Get all periods """ default_query = { "period": dataset.latest_period[1], } if query is None: query = {} default_query.update(query) query = default_query allowed_query_dims = ["period"] for dim in query.keys(): if dim not in allowed_query_dims: msg = "Querying on {} is not implemented yet".format(dim) raise NotImplementedError(msg) if query["period"] == "*": periods = [x[1] for x in dataset.periods] else: if not isinstance(query["period"], list): periods = [query["period"]] else: periods = query["period"] # Get the period id's needed to build url periods = [dataset._get_period_id(x) for x in periods] for period in periods: # Hack: For datasets with multiple uttag we get the latest # This should rather be a part of query if dataset.has_uttag: uttag = dataset.get_latest_uttag(period)[0] else: uttag = None url = dataset.get_xml_url(period, uttag) xml_data = self._get_html(url) for datapoint in get_data_from_xml(xml_data): value = datapoint["value"] del datapoint["value"] yield Result(value, datapoint)