def _get_data_by_dimension(self): self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dsd_id, frequencies_supported=FREQUENCIES_SUPPORTED) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension( dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: key = get_key_for_dimension(count_dimensions, position, dimension_value) #http://sdw-wsrest.ecb.int/service/data/IEAQ/A............ url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % ( self.dataset_code, key) if not self._is_good_url( url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]): print("bypass url[%s]" % url) continue headers = SDMX_DATA_HEADERS filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader( url=url, filename=filename, store_filepath=self.store_path, headers=headers, use_existing_file=self.fetcher.use_existing_file, #client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if filepath and os.path.exists(filepath): self.fetcher.for_delete.append(filepath) elif not filepath or not os.path.exists(filepath): continue if response: self._add_url_cache(url, response.status_code) elif response and response.status_code == HTTP_ERROR_NO_RESULT: continue elif response and response.status_code >= 400: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err yield None, None
def _get_data_by_dimension(self): self.xml_data = XMLData( provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dsd_id, frequencies_supported=FREQUENCIES_SUPPORTED, ) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: key = get_key_for_dimension(count_dimensions, position, dimension_value) # http://sdw-wsrest.ecb.int/service/data/IEAQ/A............ url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key) if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]): print("bypass url[%s]" % url) continue headers = SDMX_DATA_HEADERS filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader( url=url, filename=filename, store_filepath=self.store_path, headers=headers, use_existing_file=self.fetcher.use_existing_file, # client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if filepath and os.path.exists(filepath): self.fetcher.for_delete.append(filepath) elif not filepath or not os.path.exists(filepath): continue if response: self._add_url_cache(url, response.status_code) elif response and response.status_code == HTTP_ERROR_NO_RESULT: continue elif response and response.status_code >= 400: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err yield None, None
def _get_data_by_dimension(self): dimension_keys, dimensions = self._get_dimensions_from_dsd() choice = "avg" if self.dataset_code in ["IPC-2015-COICOP"]: choice = "max" position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice=choice) count_dimensions = len(dimension_keys) logger.info("choice[%s] - filterkey[%s] - count[%s] - provider[%s] - dataset[%s]" % (choice, _key, len(dimension_values), self.provider_name, self.dataset_code)) for dimension_value in dimension_values: '''Pour chaque valeur de la dimension, generer une key d'url''' key = get_key_for_dimension(count_dimensions, position, dimension_value) url = "http://www.bdm.insee.fr/series/sdmx/data/%s/%s" % (self.dataset_code, key) if self._is_good_url(url) is False: logger.warning("bypass not good url[%s]" % url) continue filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, use_existing_file=self.fetcher.use_existing_file, #NOT USE FOR INSEE client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if not response is None: self._add_url_cache(url, response.status_code) if filepath and os.path.exists(filepath): self.fetcher.for_delete.append(filepath) elif not filepath or not os.path.exists(filepath): continue if response and response.status_code == HTTP_ERROR_NO_RESULT: continue elif response and response.status_code >= 400: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err #self.dataset.update_database(save_only=True) yield None, None