Example #1
0
    def __init__(self, dataset=None):
        super().__init__(dataset)

        self.store_path = self.get_store_path()
        self.xml_dsd = XMLStructure(provider_name=self.provider_name)
        
        self._load_dsd()

        if self.dataset.last_update and self.xml_dsd.last_update:
            
            if self.dataset.last_update > self.xml_dsd.last_update:
                comments = "update-date[%s]" % self.xml_dsd.last_update
                raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                                  dataset_code=self.dataset.dataset_code,
                                                  comments=comments)
        
        self.dataset.last_update = clean_datetime(self.xml_dsd.last_update)        

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        self.rows = self._get_data_by_dimension()
Example #2
0
    def __init__(self, dataset=None):
        super().__init__(dataset)

        self.store_path = self.get_store_path()
        self.xml_dsd = XMLStructure(provider_name=self.provider_name)

        self._load_dsd()

        if self.dataset.last_update and self.xml_dsd.last_update:

            if self.dataset.last_update > self.xml_dsd.last_update:
                comments = "update-date[%s]" % self.xml_dsd.last_update
                raise errors.RejectUpdatedDataset(
                    provider_name=self.provider_name,
                    dataset_code=self.dataset.dataset_code,
                    comments=comments)

        self.dataset.last_update = clean_datetime(self.xml_dsd.last_update)

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        self.rows = self._get_data_by_dimension()
Example #3
0
class IMF_XML_Data(SeriesIterator):
    
    def __init__(self, dataset=None):
        super().__init__(dataset)

        self.store_path = self.get_store_path()
        self.xml_dsd = XMLStructure(provider_name=self.provider_name)
        
        self._load_dsd()

        if self.dataset.last_update and self.xml_dsd.last_update:
            
            if self.dataset.last_update > self.xml_dsd.last_update:
                comments = "update-date[%s]" % self.xml_dsd.last_update
                raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                                  dataset_code=self.dataset.dataset_code,
                                                  comments=comments)
        
        self.dataset.last_update = clean_datetime(self.xml_dsd.last_update)        

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        self.rows = self._get_data_by_dimension()

    def _get_url_dsd(self):
        return "http://dataservices.imf.org/REST/SDMX_XML.svc/DataStructure/%s" % self.dataset_code 

    def _get_url_data(self):
        return "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/%s" % self.dataset_code 
        
    def _load_dsd(self):
        url = self._get_url_dsd()
        download = Downloader(store_filepath=self.store_path,
                              url=url, 
                              filename="dsd-%s.xml" % self.dataset_code,
                              use_existing_file=self.fetcher.use_existing_file,
                              client=self.fetcher.requests_client)
        filepath = download.get_filepath()
        self.fetcher.for_delete.append(filepath)
        
        self.xml_dsd.process(filepath)
        self._set_dataset()

    def _set_dataset(self):

        dataset = dataset_converter(self.xml_dsd, self.dataset_code)
        self.dataset.dimension_keys = dataset["dimension_keys"] 
        self.dataset.attribute_keys = dataset["attribute_keys"] 
        self.dataset.concepts = dataset["concepts"] 
        self.dataset.codelists = dataset["codelists"]
        
    def _get_dimensions_from_dsd(self):
        return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code)

    def _get_data_by_dimension(self):
        
        dimension_keys, dimensions = self._get_dimensions_from_dsd()
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max")
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''
            
            local_count = 0
                        
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client)            
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)
            
            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()
            
            for row, err in self.xml_data.process(filepath):
                yield row, err
                local_count += 1
                
            if local_count >= 2999:
                logger.warning("TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]" % (self.dataset_code, key))

            #self.dataset.update_database(save_only=True)
        
        yield None, None
        
    def build_series(self, bson):
        bson["last_update"] = self.dataset.last_update
        self.dataset.add_frequency(bson["frequency"])
        return bson
Example #4
0
class IMF_XML_Data(SeriesIterator):
    def __init__(self, dataset=None):
        super().__init__(dataset)

        self.store_path = self.get_store_path()
        self.xml_dsd = XMLStructure(provider_name=self.provider_name)

        self._load_dsd()

        if self.dataset.last_update and self.xml_dsd.last_update:

            if self.dataset.last_update > self.xml_dsd.last_update:
                comments = "update-date[%s]" % self.xml_dsd.last_update
                raise errors.RejectUpdatedDataset(
                    provider_name=self.provider_name,
                    dataset_code=self.dataset.dataset_code,
                    comments=comments)

        self.dataset.last_update = clean_datetime(self.xml_dsd.last_update)

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        self.rows = self._get_data_by_dimension()

    def _get_url_dsd(self):
        return "http://dataservices.imf.org/REST/SDMX_XML.svc/DataStructure/%s" % self.dataset_code

    def _get_url_data(self):
        return "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/%s" % self.dataset_code

    def _load_dsd(self):
        url = self._get_url_dsd()
        download = Downloader(store_filepath=self.store_path,
                              url=url,
                              filename="dsd-%s.xml" % self.dataset_code,
                              use_existing_file=self.fetcher.use_existing_file,
                              client=self.fetcher.requests_client)
        filepath = download.get_filepath()
        self.fetcher.for_delete.append(filepath)

        self.xml_dsd.process(filepath)
        self._set_dataset()

    def _set_dataset(self):

        dataset = dataset_converter(self.xml_dsd, self.dataset_code)
        self.dataset.dimension_keys = dataset["dimension_keys"]
        self.dataset.attribute_keys = dataset["attribute_keys"]
        self.dataset.concepts = dataset["concepts"]
        self.dataset.codelists = dataset["codelists"]

    def _get_dimensions_from_dsd(self):
        return get_dimensions_from_dsd(self.xml_dsd, self.provider_name,
                                       self.dataset_code)

    def _get_data_by_dimension(self):

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys,
                                                            dimensions,
                                                            choice="max")

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''

            local_count = 0

            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(url=url,
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err
                local_count += 1

            if local_count >= 2999:
                logger.warning(
                    "TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]"
                    % (self.dataset_code, key))

            #self.dataset.update_database(save_only=True)

        yield None, None

    def build_series(self, bson):
        bson["last_update"] = self.dataset.last_update
        self.dataset.add_frequency(bson["frequency"])
        return bson