def _get_data_by_dimension(self): self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dsd_id, frequencies_supported=FREQUENCIES_SUPPORTED) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension( dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: key = get_key_for_dimension(count_dimensions, position, dimension_value) #http://sdw-wsrest.ecb.int/service/data/IEAQ/A............ url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % ( self.dataset_code, key) if not self._is_good_url( url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]): print("bypass url[%s]" % url) continue headers = SDMX_DATA_HEADERS filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader( url=url, filename=filename, store_filepath=self.store_path, headers=headers, use_existing_file=self.fetcher.use_existing_file, #client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if filepath and os.path.exists(filepath): self.fetcher.for_delete.append(filepath) elif not filepath or not os.path.exists(filepath): continue if response: self._add_url_cache(url, response.status_code) elif response and response.status_code == HTTP_ERROR_NO_RESULT: continue elif response and response.status_code >= 400: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err yield None, None
def _get_data_by_dimension(self): self.xml_data = XMLData( provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dsd_id, frequencies_supported=FREQUENCIES_SUPPORTED, ) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: key = get_key_for_dimension(count_dimensions, position, dimension_value) # http://sdw-wsrest.ecb.int/service/data/IEAQ/A............ url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key) if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]): print("bypass url[%s]" % url) continue headers = SDMX_DATA_HEADERS filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader( url=url, filename=filename, store_filepath=self.store_path, headers=headers, use_existing_file=self.fetcher.use_existing_file, # client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if filepath and os.path.exists(filepath): self.fetcher.for_delete.append(filepath) elif not filepath or not os.path.exists(filepath): continue if response: self._add_url_cache(url, response.status_code) elif response and response.status_code == HTTP_ERROR_NO_RESULT: continue elif response and response.status_code >= 400: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err yield None, None
def _load(self): url = "http://sdw-wsrest.ecb.int/service/dataflow/ECB/%s" % self.dataset_code download = Downloader(url=url, filename="dataflow-%s.xml" % self.dataset_code, headers=SDMX_METADATA_HEADERS) self.xml_dsd.process(download.get_filepath()) self.dsd_id = self.xml_dsd.dsd_id if not self.dsd_id: msg = "DSD ID not found for provider[%s] - dataset[%s]" % (self.provider_name, self.dataset_code) raise Exception(msg) url = "http://sdw-wsrest.ecb.int/service/datastructure/ECB/%s?references=children" % self.dsd_id download = Downloader(url=url, filename="dsd-%s.xml" % self.dataset_code, headers=SDMX_METADATA_HEADERS) self.xml_dsd.process(download.get_filepath()) self.dataset.name = self.xml_dsd.dataset_name dimensions = OrderedDict() for key, item in self.xml_dsd.dimensions.items(): dimensions[key] = item["dimensions"] self.dimension_list.set_dict(dimensions) attributes = OrderedDict() for key, item in self.xml_dsd.attributes.items(): attributes[key] = item["values"] self.attribute_list.set_dict(attributes) url = "http://sdw-wsrest.ecb.int/service/data/%s" % self.dataset_code download = Downloader(url=url, filename="data-%s.xml" % self.dataset_code, headers=SDMX_DATA_HEADERS) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, dimension_keys=self.xml_dsd.dimension_keys) #TODO: response and exception try: filepath, response = download.get_filepath_and_response() except requests.exceptions.HTTPError as err: logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code) raise self.rows = self.xml_data.process(filepath)
class ECB_Data(object): def __init__(self, dataset=None): """ :param Datasets dataset: Datasets instance """ self.dataset = dataset self.attribute_list = self.dataset.attribute_list self.dimension_list = self.dataset.dimension_list self.provider_name = self.dataset.provider_name self.dataset_code = self.dataset.dataset_code self.xml_dsd = XMLStructure_2_1(provider_name=self.provider_name, dataset_code=self.dataset_code) self.rows = None self.dsd_id = None self._load() def _load(self): url = "http://sdw-wsrest.ecb.int/service/dataflow/ECB/%s" % self.dataset_code download = Downloader(url=url, filename="dataflow-%s.xml" % self.dataset_code, headers=SDMX_METADATA_HEADERS) self.xml_dsd.process(download.get_filepath()) self.dsd_id = self.xml_dsd.dsd_id if not self.dsd_id: msg = "DSD ID not found for provider[%s] - dataset[%s]" % (self.provider_name, self.dataset_code) raise Exception(msg) url = "http://sdw-wsrest.ecb.int/service/datastructure/ECB/%s?references=children" % self.dsd_id download = Downloader(url=url, filename="dsd-%s.xml" % self.dataset_code, headers=SDMX_METADATA_HEADERS) self.xml_dsd.process(download.get_filepath()) self.dataset.name = self.xml_dsd.dataset_name dimensions = OrderedDict() for key, item in self.xml_dsd.dimensions.items(): dimensions[key] = item["dimensions"] self.dimension_list.set_dict(dimensions) attributes = OrderedDict() for key, item in self.xml_dsd.attributes.items(): attributes[key] = item["values"] self.attribute_list.set_dict(attributes) url = "http://sdw-wsrest.ecb.int/service/data/%s" % self.dataset_code download = Downloader(url=url, filename="data-%s.xml" % self.dataset_code, headers=SDMX_DATA_HEADERS) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, dimension_keys=self.xml_dsd.dimension_keys) #TODO: response and exception try: filepath, response = download.get_filepath_and_response() except requests.exceptions.HTTPError as err: logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code) raise self.rows = self.xml_data.process(filepath) def __next__(self): _series = next(self.rows) if not _series: raise StopIteration() return self.build_series(_series) def build_series(self, bson): bson["last_update"] = self.dataset.last_update return bson
def _get_data_by_dimension(self): self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, frequencies_supported=FREQUENCIES_SUPPORTED) dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) position, _key, dimension_values = select_dimension(dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key) headers = SDMX_DATA_HEADERS last_modified = None if self.dataset.metadata and "Last-Modified" in self.dataset.metadata: headers["If-Modified-Since"] = self.dataset.metadata["Last-Modified"] last_modified = self.dataset.metadata["Last-Modified"] filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, headers=headers, client=self.fetcher.requests_client) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code == HTTP_ERROR_NOT_MODIFIED: msg = "Reject dataset updated for provider[%s] - dataset[%s] - update-date[%s]" logger.warning(msg % (self.provider_name, self.dataset_code, last_modified)) continue elif response.status_code == HTTP_ERROR_NO_RESULT: continue elif response.status_code >= 400: raise response.raise_for_status() if "Last-Modified" in response.headers: if not self.dataset.metadata: self.dataset.metadata = {} self.dataset.metadata["Last-Modified"] = response.headers["Last-Modified"] for row, err in self.xml_data.process(filepath): yield row, err #self.dataset.update_database(save_only=True) yield None, None
class ECB_Data(SeriesIterator): def __init__(self, dataset): """ :param Datasets dataset: Datasets instance """ super().__init__(dataset) self.store_path = self.get_store_path() self.dataset.name = self.fetcher._dataflows[self.dataset_code]["name"] self.dsd_id = self.fetcher._dataflows[self.dataset_code]["dsd_id"] self.xml_dsd = XMLStructure(provider_name=self.provider_name) self.xml_dsd.concepts = self.fetcher._concepts self._load() self.rows = self._get_data_by_dimension() def _load(self): url = "http://sdw-wsrest.ecb.int/service/datastructure/ECB/%s?references=all" % self.dsd_id download = utils.Downloader(store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, headers=SDMX_METADATA_HEADERS, use_existing_file=self.fetcher.use_existing_file) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _get_data_by_dimension(self): self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, frequencies_supported=FREQUENCIES_SUPPORTED) dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) position, _key, dimension_values = select_dimension(dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key) headers = SDMX_DATA_HEADERS last_modified = None if self.dataset.metadata and "Last-Modified" in self.dataset.metadata: headers["If-Modified-Since"] = self.dataset.metadata["Last-Modified"] last_modified = self.dataset.metadata["Last-Modified"] filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, headers=headers, client=self.fetcher.requests_client) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code == HTTP_ERROR_NOT_MODIFIED: msg = "Reject dataset updated for provider[%s] - dataset[%s] - update-date[%s]" logger.warning(msg % (self.provider_name, self.dataset_code, last_modified)) continue elif response.status_code == HTTP_ERROR_NO_RESULT: continue elif response.status_code >= 400: raise response.raise_for_status() if "Last-Modified" in response.headers: if not self.dataset.metadata: self.dataset.metadata = {} self.dataset.metadata["Last-Modified"] = response.headers["Last-Modified"] for row, err in self.xml_data.process(filepath): yield row, err #self.dataset.update_database(save_only=True) yield None, None def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def clean_field(self, bson): bson = super().clean_field(bson) bson["attributes"].pop("TITLE", None) bson["attributes"].pop("TITLE_COMPL", None) return bson def build_series(self, bson): self.dataset.add_frequency(bson["frequency"]) bson["last_update"] = self.dataset.last_update return bson
class ECB_Data(SeriesIterator): def __init__(self, dataset): """ :param Datasets dataset: Datasets instance """ super().__init__(dataset) self.store_path = self.get_store_path() self.last_modified = None self.dataset.name = self.fetcher._dataflows[self.dataset_code]["name"] self.dsd_id = self.fetcher._dataflows[self.dataset_code]["dsd_id"] self.agency_id = self.fetcher._dataflows[self.dataset_code]["attrs"].get("agencyID") self.xml_dsd = XMLStructure(provider_name=self.provider_name) # self.xml_dsd.concepts = self.fetcher._concepts self._load() self.rows = self._get_data_by_dimension() def _load(self): url = "http://sdw-wsrest.ecb.int/service/datastructure/%s/%s?references=all" % (self.agency_id, self.dsd_id) download = utils.Downloader( store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, headers=SDMX_METADATA_HEADERS, use_existing_file=self.fetcher.use_existing_file, ) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): self.xml_data = XMLData( provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dsd_id, frequencies_supported=FREQUENCIES_SUPPORTED, ) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: key = get_key_for_dimension(count_dimensions, position, dimension_value) # http://sdw-wsrest.ecb.int/service/data/IEAQ/A............ url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key) if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]): print("bypass url[%s]" % url) continue headers = SDMX_DATA_HEADERS filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader( url=url, filename=filename, store_filepath=self.store_path, headers=headers, use_existing_file=self.fetcher.use_existing_file, # client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if filepath and os.path.exists(filepath): self.fetcher.for_delete.append(filepath) elif not filepath or not os.path.exists(filepath): continue if response: self._add_url_cache(url, response.status_code) elif response and response.status_code == HTTP_ERROR_NO_RESULT: continue elif response and response.status_code >= 400: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err yield None, None def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def clean_field(self, bson): bson["attributes"].pop("TITLE", None) bson["attributes"].pop("TITLE_COMPL", None) bson = super().clean_field(bson) return bson def build_series(self, bson): self.dataset.add_frequency(bson["frequency"]) bson["last_update"] = self.dataset.last_update return bson
class ECB_Data(SeriesIterator): def __init__(self, dataset): """ :param Datasets dataset: Datasets instance """ super().__init__(dataset) self.store_path = self.get_store_path() self.last_modified = None self.dataset.name = self.fetcher._dataflows[self.dataset_code]["name"] self.dsd_id = self.fetcher._dataflows[self.dataset_code]["dsd_id"] self.agency_id = self.fetcher._dataflows[ self.dataset_code]["attrs"].get("agencyID") self.xml_dsd = XMLStructure(provider_name=self.provider_name) #self.xml_dsd.concepts = self.fetcher._concepts self._load() self.rows = self._get_data_by_dimension() def _load(self): url = "http://sdw-wsrest.ecb.int/service/datastructure/%s/%s?references=all" % ( self.agency_id, self.dsd_id) download = utils.Downloader( store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, headers=SDMX_METADATA_HEADERS, use_existing_file=self.fetcher.use_existing_file) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dsd_id, frequencies_supported=FREQUENCIES_SUPPORTED) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension( dimension_keys, dimensions) count_dimensions = len(dimension_keys) for dimension_value in dimension_values: key = get_key_for_dimension(count_dimensions, position, dimension_value) #http://sdw-wsrest.ecb.int/service/data/IEAQ/A............ url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % ( self.dataset_code, key) if not self._is_good_url( url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]): print("bypass url[%s]" % url) continue headers = SDMX_DATA_HEADERS filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader( url=url, filename=filename, store_filepath=self.store_path, headers=headers, use_existing_file=self.fetcher.use_existing_file, #client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if filepath and os.path.exists(filepath): self.fetcher.for_delete.append(filepath) elif not filepath or not os.path.exists(filepath): continue if response: self._add_url_cache(url, response.status_code) elif response and response.status_code == HTTP_ERROR_NO_RESULT: continue elif response and response.status_code >= 400: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err yield None, None def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def clean_field(self, bson): bson["attributes"].pop("TITLE", None) bson["attributes"].pop("TITLE_COMPL", None) bson = super().clean_field(bson) return bson def build_series(self, bson): self.dataset.add_frequency(bson["frequency"]) bson["last_update"] = self.dataset.last_update return bson