Esempio n. 1
0
    def test_select_dimension(self):

        # nosetests -s -v dlstats.tests.test_xml_utils:UtilsTestCase.test_select_dimension
        
        print()

        dimensions = {}
        dimension_keys = ["A", "B", "C"]
        dimensions["A"] = {"a1": "a1 lib"}
        dimensions["B"] = {"b1": "b1 lib", "b2": "b2 lib"}
        dimensions["C"] = {"c1": "c1 lib", "c2": "c2 lib", "c3": "c3 lib"}
        
        position, key, dimension_values = xml_utils.select_dimension(dimension_keys, 
                                                                      dimensions, 
                                                                      choice="max")
        self.assertEqual(key, "C")
        self.assertEqual(position, 2)
        self.assertEqual(sorted(dimension_values), ["c1", "c2", "c3"])
        
        position, key, dimension_values = xml_utils.select_dimension(dimension_keys, 
                                                                    dimensions, 
                                                                    choice="min")
        self.assertEqual(key, "A")
        self.assertEqual(position, 0)
        self.assertEqual(sorted(dimension_values), ["a1"])

        
        
        position, key, dimension_values = xml_utils.select_dimension(dimension_keys, 
                                                                    dimensions, 
                                                                    choice="avg")
        self.assertEqual(key, "B")
        self.assertEqual(position, 1)
        self.assertEqual(sorted(dimension_values), ["b1", "b2"])

        
        
        position, key, dimension_values = xml_utils.select_dimension(dimension_keys, 
                                                                    dimensions, 
                                                                    choice=None)
        self.assertEqual(key, "B")
        self.assertEqual(position, 1)
        self.assertEqual(sorted(dimension_values), ["b1", "b2"])



        position, key, dimension_values = xml_utils.select_dimension([], 
                                                                     {}, 
                                                                     choice="min")
        self.assertEqual(key, None)
        self.assertEqual(position, 0)
        self.assertEqual(sorted(dimension_values), [])
Esempio n. 2
0
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dsd_id,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(
            dimension_keys, dimensions)

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            key = get_key_for_dimension(count_dimensions, position,
                                        dimension_value)

            #http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (
                self.dataset_code, key)
            if not self._is_good_url(
                    url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
                print("bypass url[%s]" % url)
                continue

            headers = SDMX_DATA_HEADERS

            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(
                url=url,
                filename=filename,
                store_filepath=self.store_path,
                headers=headers,
                use_existing_file=self.fetcher.use_existing_file,
                #client=self.fetcher.requests_client
            )
            filepath, response = download.get_filepath_and_response()

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response:
                self._add_url_cache(url, response.status_code)
            elif response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

        yield None, None
Esempio n. 3
0
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(
            provider_name=self.provider_name,
            dataset_code=self.dataset_code,
            xml_dsd=self.xml_dsd,
            dsd_id=self.dsd_id,
            frequencies_supported=FREQUENCIES_SUPPORTED,
        )

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys, dimensions)

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            key = get_key_for_dimension(count_dimensions, position, dimension_value)

            # http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
            if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
                print("bypass url[%s]" % url)
                continue

            headers = SDMX_DATA_HEADERS

            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(
                url=url,
                filename=filename,
                store_filepath=self.store_path,
                headers=headers,
                use_existing_file=self.fetcher.use_existing_file,
                # client=self.fetcher.requests_client
            )
            filepath, response = download.get_filepath_and_response()

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response:
                self._add_url_cache(url, response.status_code)
            elif response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

        yield None, None
Esempio n. 4
0
    def _get_data_by_dimension(self):

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        choice = "avg"
        if self.dataset_code in ["IPC-2015-COICOP"]:
            choice = "max"

        position, _key, dimension_values = select_dimension(dimension_keys,
                                                            dimensions,
                                                            choice=choice)

        count_dimensions = len(dimension_keys)

        logger.info("choice[%s] - filterkey[%s] - count[%s] - provider[%s] - dataset[%s]" % (choice, _key, len(dimension_values), self.provider_name, self.dataset_code))

        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''

            key = get_key_for_dimension(count_dimensions, position, dimension_value)

            url = "http://www.bdm.insee.fr/series/sdmx/data/%s/%s" % (self.dataset_code, key)
            if self._is_good_url(url) is False:
                logger.warning("bypass not good url[%s]" % url)
                continue

            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url,
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  use_existing_file=self.fetcher.use_existing_file,
                                  #NOT USE FOR INSEE client=self.fetcher.requests_client
                                  )
            filepath, response = download.get_filepath_and_response()

            if not response is None:
                self._add_url_cache(url, response.status_code)

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)

        yield None, None
Esempio n. 5
0
    def _get_data_by_dimension(self):
        
        dimension_keys, dimensions = self._get_dimensions_from_dsd()
        
        choice = "avg" 
        if self.dataset_code in ["IPC-2015-COICOP"]:
            choice = "max"        
        
        position, _key, dimension_values = select_dimension(dimension_keys, 
                                                            dimensions, 
                                                            choice=choice)
        
        count_dimensions = len(dimension_keys)
        
        logger.info("choice[%s] - filterkey[%s] - count[%s] - provider[%s] - dataset[%s]" % (choice, _key, len(dimension_values), self.provider_name, self.dataset_code))
        
        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''
        
            key = get_key_for_dimension(count_dimensions, position, dimension_value)    

            url = "http://www.bdm.insee.fr/series/sdmx/data/%s/%s" % (self.dataset_code, key)
            if self._is_good_url(url) is False:
                logger.warning("bypass not good url[%s]" % url)
                continue
            
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  use_existing_file=self.fetcher.use_existing_file,
                                  #NOT USE FOR INSEE client=self.fetcher.requests_client
                                  )
            filepath, response = download.get_filepath_and_response()

            if not response is None:
                self._add_url_cache(url, response.status_code)
            
            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath): 
                continue

            if response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None
Esempio n. 6
0
    def test_select_dimension(self):

        # nosetests -s -v dlstats.tests.test_xml_utils:UtilsTestCase.test_select_dimension

        print()

        dimensions = {}
        dimension_keys = ["A", "B", "C"]
        dimensions["A"] = {"a1": "a1 lib"}
        dimensions["B"] = {"b1": "b1 lib", "b2": "b2 lib"}
        dimensions["C"] = {"c1": "c1 lib", "c2": "c2 lib", "c3": "c3 lib"}

        position, key, dimension_values = xml_utils.select_dimension(
            dimension_keys, dimensions, choice="max")
        self.assertEqual(key, "C")
        self.assertEqual(position, 2)
        self.assertEqual(sorted(dimension_values), ["c1", "c2", "c3"])

        position, key, dimension_values = xml_utils.select_dimension(
            dimension_keys, dimensions, choice="min")
        self.assertEqual(key, "A")
        self.assertEqual(position, 0)
        self.assertEqual(sorted(dimension_values), ["a1"])

        position, key, dimension_values = xml_utils.select_dimension(
            dimension_keys, dimensions, choice="avg")
        self.assertEqual(key, "B")
        self.assertEqual(position, 1)
        self.assertEqual(sorted(dimension_values), ["b1", "b2"])

        position, key, dimension_values = xml_utils.select_dimension(
            dimension_keys, dimensions, choice=None)
        self.assertEqual(key, "B")
        self.assertEqual(position, 1)
        self.assertEqual(sorted(dimension_values), ["b1", "b2"])

        position, key, dimension_values = xml_utils.select_dimension(
            [], {}, choice="min")
        self.assertEqual(key, None)
        self.assertEqual(position, 0)
        self.assertEqual(sorted(dimension_values), [])
Esempio n. 7
0
    def _get_data_by_dimension(self):

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys,
                                                            dimensions,
                                                            choice="max")

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''

            local_count = 0

            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(url=url,
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err
                local_count += 1

            if local_count >= 2999:
                logger.warning(
                    "TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]"
                    % (self.dataset_code, key))

            #self.dataset.update_database(save_only=True)

        yield None, None
Esempio n. 8
0
    def _get_data_by_dimension(self):
        
        dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
                                                                       self.provider_name,
                                                                       self.dataset_code)
        
        choice = "avg" 
        if self.dataset_code in ["IPC-2015-COICOP"]:
            choice = "max"        
        
        position, _key, dimension_values = select_dimension(dimension_keys, 
                                                            dimensions, 
                                                            choice=choice)
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''
            
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "http://www.bdm.insee.fr/series/sdmx/data/%s/%s" % (self.dataset_code, key)
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  #client=self.fetcher.requests_client
                                  )
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response.status_code >= 400:
                raise response.raise_for_status()
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None
Esempio n. 9
0
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys,
                                                            dimensions,
                                                            choice="max")

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(url=url,
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)

        yield None, None
Esempio n. 10
0
    def _get_data_by_dimension(self):
        
        dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
                                                             self.provider_name,
                                                             self.dataset_code)
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max")
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''
            
            local_count = 0
                        
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client)            
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)
            
            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()
            
            for row, err in self.xml_data.process(filepath):
                yield row, err
                local_count += 1
                
            if local_count >= 2999:
                logger.warning("TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]" % (self.dataset_code, key))

            #self.dataset.update_database(save_only=True)
        
        yield None, None
Esempio n. 11
0
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        dimension_keys, dimensions = self._get_dimensions_from_dsd()
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max")
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
            
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client
                                  )
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)
            
            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None
Esempio n. 12
0
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
                                                             self.provider_name,
                                                             self.dataset_code)
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions)
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
                        
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
            headers = SDMX_DATA_HEADERS
            
            last_modified = None
            if self.dataset.metadata and "Last-Modified" in self.dataset.metadata:
                headers["If-Modified-Since"] = self.dataset.metadata["Last-Modified"]
                last_modified = self.dataset.metadata["Last-Modified"]
        
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))               
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  headers=headers,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code == HTTP_ERROR_NOT_MODIFIED:
                msg = "Reject dataset updated for provider[%s] - dataset[%s] - update-date[%s]"
                logger.warning(msg % (self.provider_name, self.dataset_code, last_modified))
                continue
            
            elif response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            
            elif response.status_code >= 400:
                raise response.raise_for_status()
    
            if "Last-Modified" in response.headers:
                if not self.dataset.metadata:
                    self.dataset.metadata = {}
                self.dataset.metadata["Last-Modified"] = response.headers["Last-Modified"]
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None