Example #1
0
 def test_get_data_at_different_levels(self):
     file_path = os.path.join(DATA_DIR,
                              "exp_pers_amne_gy_kommun_amne_2018.xml")
     with open(file_path) as f:
         content = f.read()
         data = [x for x in get_data_from_xml(content)]
         assert data[0]["niva"] == "kommun"
Example #2
0
 def test_get_data_from_xml_with_uttag_dimension(self):
     file_path = os.path.join(DATA_DIR,
                              "exp_pers_amne_gr_skola_2014_sample.xml")
     with open(file_path) as f:
         content = f.read()
         data = [x for x in get_data_from_xml(content)]
         assert data[0]["uttag"] == "2015-08-17"
Example #3
0
 def test_get_data_from_xml_with_amne_dimension(self):
     file_path = os.path.join(DATA_DIR, "exp_personal_alder_gr_kommun_2017_sample.xml")
     with open(file_path) as f:
         content = f.read()
         data = [x for x in get_data_from_xml(content)]
         assert "amne" in data[0]
         assert data[0]["amne"] == u"Samtliga lärare"
Example #4
0
 def get_data_from_xml(self):
     file_path = os.path.join(DATA_DIR, "exp_kostnader_kommun_fklass_2016.xml")
     with open(file_path) as f:
         content = f.read()
         data = [x for x in get_data_from_xml(content)]
         assert len(data) == 1740
         assert data[0]["niva"] == "skola"
Example #5
0
    def _fetch_data(self, dataset, query):
        """Make the actual query.

        The only queryable dimensions are period.

        >>> dataset.fetch({"period": "2016"})
        >>> dataset.fetch({"period": ["2015", "2016"]})
        >>> dataset.fetch({"period": "*"}) # Get all periods
        """
        default_query = {
            "period": dataset.latest_period[1],
        }
        if query is None:
            query = {}

        default_query.update(query)
        query = default_query
        allowed_query_dims = ["period"]

        for dim in query.keys():
            if dim not in allowed_query_dims:
                msg = "Querying on {} is not implemented yet".format(dim)
                raise NotImplementedError(msg)

        if query["period"] == "*":
            periods = [x[1] for x in dataset.periods]
        else:
            if not isinstance(query["period"], list):
                periods = [query["period"]]
            else:
                periods = query["period"]

        # Get the period id's needed to build url
        periods = [dataset._get_period_id(x) for x in periods]

        for period in periods:
            # Hack: For datasets with multiple uttag we get the latest
            # This should rather be a part of query
            if dataset.has_uttag:
                uttag = dataset.get_latest_uttag(period)[0]
            else:
                uttag = None
            url = dataset.get_xml_url(period, uttag)
            xml_data = self._get_html(url)
            for datapoint in get_data_from_xml(xml_data):
                value = datapoint["value"]
                del datapoint["value"]
                yield Result(value, datapoint)