def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension()
def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset( provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension()
class IMF_XML_Data(SeriesIterator): def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension() def _get_url_dsd(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/DataStructure/%s" % self.dataset_code def _get_url_data(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/%s" % self.dataset_code def _load_dsd(self): url = self._get_url_dsd() download = Downloader(store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, use_existing_file=self.fetcher.use_existing_file, client=self.fetcher.requests_client) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max") count_dimensions = len(dimension_keys) for dimension_value in dimension_values: '''Pour chaque valeur de la dimension, generer une key d'url''' local_count = 0 sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "%s/%s" % (self._get_url_data(), key) filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, client=self.fetcher.requests_client) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code >= 400 and response.status_code < 500: continue elif response.status_code >= 500: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err local_count += 1 if local_count >= 2999: logger.warning("TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]" % (self.dataset_code, key)) #self.dataset.update_database(save_only=True) yield None, None def build_series(self, bson): bson["last_update"] = self.dataset.last_update self.dataset.add_frequency(bson["frequency"]) return bson
class IMF_XML_Data(SeriesIterator): def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset( provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension() def _get_url_dsd(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/DataStructure/%s" % self.dataset_code def _get_url_data(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/%s" % self.dataset_code def _load_dsd(self): url = self._get_url_dsd() download = Downloader(store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, use_existing_file=self.fetcher.use_existing_file, client=self.fetcher.requests_client) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max") count_dimensions = len(dimension_keys) for dimension_value in dimension_values: '''Pour chaque valeur de la dimension, generer une key d'url''' local_count = 0 sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "%s/%s" % (self._get_url_data(), key) filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, client=self.fetcher.requests_client) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code >= 400 and response.status_code < 500: continue elif response.status_code >= 500: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err local_count += 1 if local_count >= 2999: logger.warning( "TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]" % (self.dataset_code, key)) #self.dataset.update_database(save_only=True) yield None, None def build_series(self, bson): bson["last_update"] = self.dataset.last_update self.dataset.add_frequency(bson["frequency"]) return bson