コード例 #1
0
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dsd_id,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(
            dimension_keys, dimensions)

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            key = get_key_for_dimension(count_dimensions, position,
                                        dimension_value)

            #http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (
                self.dataset_code, key)
            if not self._is_good_url(
                    url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
                print("bypass url[%s]" % url)
                continue

            headers = SDMX_DATA_HEADERS

            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(
                url=url,
                filename=filename,
                store_filepath=self.store_path,
                headers=headers,
                use_existing_file=self.fetcher.use_existing_file,
                #client=self.fetcher.requests_client
            )
            filepath, response = download.get_filepath_and_response()

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response:
                self._add_url_cache(url, response.status_code)
            elif response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

        yield None, None
コード例 #2
0
ファイル: ecb.py プロジェクト: srault95/dlstats
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(
            provider_name=self.provider_name,
            dataset_code=self.dataset_code,
            xml_dsd=self.xml_dsd,
            dsd_id=self.dsd_id,
            frequencies_supported=FREQUENCIES_SUPPORTED,
        )

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys, dimensions)

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            key = get_key_for_dimension(count_dimensions, position, dimension_value)

            # http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
            if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
                print("bypass url[%s]" % url)
                continue

            headers = SDMX_DATA_HEADERS

            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(
                url=url,
                filename=filename,
                store_filepath=self.store_path,
                headers=headers,
                use_existing_file=self.fetcher.use_existing_file,
                # client=self.fetcher.requests_client
            )
            filepath, response = download.get_filepath_and_response()

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response:
                self._add_url_cache(url, response.status_code)
            elif response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

        yield None, None
コード例 #3
0
ファイル: ecb.py プロジェクト: MichelJuillard/dlstats
    def _load(self):

        url = "http://sdw-wsrest.ecb.int/service/dataflow/ECB/%s" % self.dataset_code
        download = Downloader(url=url, 
                              filename="dataflow-%s.xml" % self.dataset_code,
                              headers=SDMX_METADATA_HEADERS)
        
        self.xml_dsd.process(download.get_filepath())
        self.dsd_id = self.xml_dsd.dsd_id
        
        if not self.dsd_id:
            msg = "DSD ID not found for provider[%s] - dataset[%s]" % (self.provider_name, 
                                                                       self.dataset_code)
            raise Exception(msg)
        
        url = "http://sdw-wsrest.ecb.int/service/datastructure/ECB/%s?references=children" % self.dsd_id
        download = Downloader(url=url, 
                              filename="dsd-%s.xml" % self.dataset_code,
                              headers=SDMX_METADATA_HEADERS)
        self.xml_dsd.process(download.get_filepath())
        
        self.dataset.name = self.xml_dsd.dataset_name
        
        dimensions = OrderedDict()
        for key, item in self.xml_dsd.dimensions.items():
            dimensions[key] = item["dimensions"]
        self.dimension_list.set_dict(dimensions)
        
        attributes = OrderedDict()
        for key, item in self.xml_dsd.attributes.items():
            attributes[key] = item["values"]
        self.attribute_list.set_dict(attributes)
        
        url = "http://sdw-wsrest.ecb.int/service/data/%s" % self.dataset_code
        download = Downloader(url=url, 
                              filename="data-%s.xml" % self.dataset_code,
                              headers=SDMX_DATA_HEADERS)

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                dimension_keys=self.xml_dsd.dimension_keys)
        
        
        #TODO: response and exception
        try:
            filepath, response = download.get_filepath_and_response()        
        except requests.exceptions.HTTPError as err:
            logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code)
            raise
            
        self.rows = self.xml_data.process(filepath)
コード例 #4
0
ファイル: ecb.py プロジェクト: MichelJuillard/dlstats
class ECB_Data(object):
    
    def __init__(self, dataset=None):
        """
        :param Datasets dataset: Datasets instance
        """        
        self.dataset = dataset
        self.attribute_list = self.dataset.attribute_list
        self.dimension_list = self.dataset.dimension_list
        self.provider_name = self.dataset.provider_name
        self.dataset_code = self.dataset.dataset_code

        self.xml_dsd = XMLStructure_2_1(provider_name=self.provider_name, 
                                        dataset_code=self.dataset_code)        
        
        self.rows = None
        self.dsd_id = None
        
        self._load()
        
        
    def _load(self):

        url = "http://sdw-wsrest.ecb.int/service/dataflow/ECB/%s" % self.dataset_code
        download = Downloader(url=url, 
                              filename="dataflow-%s.xml" % self.dataset_code,
                              headers=SDMX_METADATA_HEADERS)
        
        self.xml_dsd.process(download.get_filepath())
        self.dsd_id = self.xml_dsd.dsd_id
        
        if not self.dsd_id:
            msg = "DSD ID not found for provider[%s] - dataset[%s]" % (self.provider_name, 
                                                                       self.dataset_code)
            raise Exception(msg)
        
        url = "http://sdw-wsrest.ecb.int/service/datastructure/ECB/%s?references=children" % self.dsd_id
        download = Downloader(url=url, 
                              filename="dsd-%s.xml" % self.dataset_code,
                              headers=SDMX_METADATA_HEADERS)
        self.xml_dsd.process(download.get_filepath())
        
        self.dataset.name = self.xml_dsd.dataset_name
        
        dimensions = OrderedDict()
        for key, item in self.xml_dsd.dimensions.items():
            dimensions[key] = item["dimensions"]
        self.dimension_list.set_dict(dimensions)
        
        attributes = OrderedDict()
        for key, item in self.xml_dsd.attributes.items():
            attributes[key] = item["values"]
        self.attribute_list.set_dict(attributes)
        
        url = "http://sdw-wsrest.ecb.int/service/data/%s" % self.dataset_code
        download = Downloader(url=url, 
                              filename="data-%s.xml" % self.dataset_code,
                              headers=SDMX_DATA_HEADERS)

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                dimension_keys=self.xml_dsd.dimension_keys)
        
        
        #TODO: response and exception
        try:
            filepath, response = download.get_filepath_and_response()        
        except requests.exceptions.HTTPError as err:
            logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code)
            raise
            
        self.rows = self.xml_data.process(filepath)


    def __next__(self):
        _series = next(self.rows)
        
        if not _series:
            raise StopIteration()
        
        return self.build_series(_series)

    def build_series(self, bson):
        bson["last_update"] = self.dataset.last_update
        return bson
コード例 #5
0
ファイル: ecb.py プロジェクト: ThomasRoca/dlstats
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
                                                             self.provider_name,
                                                             self.dataset_code)
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions)
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
                        
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
            headers = SDMX_DATA_HEADERS
            
            last_modified = None
            if self.dataset.metadata and "Last-Modified" in self.dataset.metadata:
                headers["If-Modified-Since"] = self.dataset.metadata["Last-Modified"]
                last_modified = self.dataset.metadata["Last-Modified"]
        
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))               
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  headers=headers,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code == HTTP_ERROR_NOT_MODIFIED:
                msg = "Reject dataset updated for provider[%s] - dataset[%s] - update-date[%s]"
                logger.warning(msg % (self.provider_name, self.dataset_code, last_modified))
                continue
            
            elif response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            
            elif response.status_code >= 400:
                raise response.raise_for_status()
    
            if "Last-Modified" in response.headers:
                if not self.dataset.metadata:
                    self.dataset.metadata = {}
                self.dataset.metadata["Last-Modified"] = response.headers["Last-Modified"]
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None
コード例 #6
0
ファイル: ecb.py プロジェクト: ThomasRoca/dlstats
class ECB_Data(SeriesIterator):
    
    def __init__(self, dataset):
        """
        :param Datasets dataset: Datasets instance
        """
        super().__init__(dataset)
        self.store_path = self.get_store_path()

        self.dataset.name = self.fetcher._dataflows[self.dataset_code]["name"]        
        self.dsd_id = self.fetcher._dataflows[self.dataset_code]["dsd_id"]

        self.xml_dsd = XMLStructure(provider_name=self.provider_name)        
        self.xml_dsd.concepts = self.fetcher._concepts
        
        self._load()
        
        self.rows = self._get_data_by_dimension()        
                
    def _load(self):

        url = "http://sdw-wsrest.ecb.int/service/datastructure/ECB/%s?references=all" % self.dsd_id
        download = utils.Downloader(store_filepath=self.store_path,
                                    url=url, 
                                    filename="dsd-%s.xml" % self.dataset_code,
                                    headers=SDMX_METADATA_HEADERS,
                                    use_existing_file=self.fetcher.use_existing_file)
        filepath = download.get_filepath()
        self.fetcher.for_delete.append(filepath)
        self.xml_dsd.process(filepath)
        self._set_dataset()

    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
                                                             self.provider_name,
                                                             self.dataset_code)
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions)
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
                        
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
            headers = SDMX_DATA_HEADERS
            
            last_modified = None
            if self.dataset.metadata and "Last-Modified" in self.dataset.metadata:
                headers["If-Modified-Since"] = self.dataset.metadata["Last-Modified"]
                last_modified = self.dataset.metadata["Last-Modified"]
        
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))               
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  headers=headers,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code == HTTP_ERROR_NOT_MODIFIED:
                msg = "Reject dataset updated for provider[%s] - dataset[%s] - update-date[%s]"
                logger.warning(msg % (self.provider_name, self.dataset_code, last_modified))
                continue
            
            elif response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            
            elif response.status_code >= 400:
                raise response.raise_for_status()
    
            if "Last-Modified" in response.headers:
                if not self.dataset.metadata:
                    self.dataset.metadata = {}
                self.dataset.metadata["Last-Modified"] = response.headers["Last-Modified"]
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None
                        
    def _set_dataset(self):
        dataset = dataset_converter(self.xml_dsd, self.dataset_code)
        self.dataset.dimension_keys = dataset["dimension_keys"] 
        self.dataset.attribute_keys = dataset["attribute_keys"] 
        self.dataset.concepts = dataset["concepts"] 
        self.dataset.codelists = dataset["codelists"]
        
    def clean_field(self, bson):
        bson = super().clean_field(bson)
        bson["attributes"].pop("TITLE", None)
        bson["attributes"].pop("TITLE_COMPL", None)
        return bson

    def build_series(self, bson):
        self.dataset.add_frequency(bson["frequency"])
        bson["last_update"] = self.dataset.last_update
        
        return bson
コード例 #7
0
ファイル: ecb.py プロジェクト: srault95/dlstats
class ECB_Data(SeriesIterator):
    def __init__(self, dataset):
        """
        :param Datasets dataset: Datasets instance
        """
        super().__init__(dataset)
        self.store_path = self.get_store_path()
        self.last_modified = None

        self.dataset.name = self.fetcher._dataflows[self.dataset_code]["name"]
        self.dsd_id = self.fetcher._dataflows[self.dataset_code]["dsd_id"]
        self.agency_id = self.fetcher._dataflows[self.dataset_code]["attrs"].get("agencyID")

        self.xml_dsd = XMLStructure(provider_name=self.provider_name)
        # self.xml_dsd.concepts = self.fetcher._concepts

        self._load()

        self.rows = self._get_data_by_dimension()

    def _load(self):

        url = "http://sdw-wsrest.ecb.int/service/datastructure/%s/%s?references=all" % (self.agency_id, self.dsd_id)
        download = utils.Downloader(
            store_filepath=self.store_path,
            url=url,
            filename="dsd-%s.xml" % self.dataset_code,
            headers=SDMX_METADATA_HEADERS,
            use_existing_file=self.fetcher.use_existing_file,
        )
        filepath = download.get_filepath()
        self.fetcher.for_delete.append(filepath)
        self.xml_dsd.process(filepath)
        self._set_dataset()

    def _get_dimensions_from_dsd(self):
        return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code)

    def _get_data_by_dimension(self):

        self.xml_data = XMLData(
            provider_name=self.provider_name,
            dataset_code=self.dataset_code,
            xml_dsd=self.xml_dsd,
            dsd_id=self.dsd_id,
            frequencies_supported=FREQUENCIES_SUPPORTED,
        )

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys, dimensions)

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            key = get_key_for_dimension(count_dimensions, position, dimension_value)

            # http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
            if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
                print("bypass url[%s]" % url)
                continue

            headers = SDMX_DATA_HEADERS

            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(
                url=url,
                filename=filename,
                store_filepath=self.store_path,
                headers=headers,
                use_existing_file=self.fetcher.use_existing_file,
                # client=self.fetcher.requests_client
            )
            filepath, response = download.get_filepath_and_response()

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response:
                self._add_url_cache(url, response.status_code)
            elif response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

        yield None, None

    def _set_dataset(self):
        dataset = dataset_converter(self.xml_dsd, self.dataset_code)
        self.dataset.dimension_keys = dataset["dimension_keys"]
        self.dataset.attribute_keys = dataset["attribute_keys"]
        self.dataset.concepts = dataset["concepts"]
        self.dataset.codelists = dataset["codelists"]

    def clean_field(self, bson):
        bson["attributes"].pop("TITLE", None)
        bson["attributes"].pop("TITLE_COMPL", None)
        bson = super().clean_field(bson)
        return bson

    def build_series(self, bson):
        self.dataset.add_frequency(bson["frequency"])
        bson["last_update"] = self.dataset.last_update

        return bson
コード例 #8
0
class ECB_Data(SeriesIterator):
    def __init__(self, dataset):
        """
        :param Datasets dataset: Datasets instance
        """
        super().__init__(dataset)
        self.store_path = self.get_store_path()
        self.last_modified = None

        self.dataset.name = self.fetcher._dataflows[self.dataset_code]["name"]
        self.dsd_id = self.fetcher._dataflows[self.dataset_code]["dsd_id"]
        self.agency_id = self.fetcher._dataflows[
            self.dataset_code]["attrs"].get("agencyID")

        self.xml_dsd = XMLStructure(provider_name=self.provider_name)
        #self.xml_dsd.concepts = self.fetcher._concepts

        self._load()

        self.rows = self._get_data_by_dimension()

    def _load(self):

        url = "http://sdw-wsrest.ecb.int/service/datastructure/%s/%s?references=all" % (
            self.agency_id, self.dsd_id)
        download = utils.Downloader(
            store_filepath=self.store_path,
            url=url,
            filename="dsd-%s.xml" % self.dataset_code,
            headers=SDMX_METADATA_HEADERS,
            use_existing_file=self.fetcher.use_existing_file)
        filepath = download.get_filepath()
        self.fetcher.for_delete.append(filepath)
        self.xml_dsd.process(filepath)
        self._set_dataset()

    def _get_dimensions_from_dsd(self):
        return get_dimensions_from_dsd(self.xml_dsd, self.provider_name,
                                       self.dataset_code)

    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dsd_id,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(
            dimension_keys, dimensions)

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            key = get_key_for_dimension(count_dimensions, position,
                                        dimension_value)

            #http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (
                self.dataset_code, key)
            if not self._is_good_url(
                    url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
                print("bypass url[%s]" % url)
                continue

            headers = SDMX_DATA_HEADERS

            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(
                url=url,
                filename=filename,
                store_filepath=self.store_path,
                headers=headers,
                use_existing_file=self.fetcher.use_existing_file,
                #client=self.fetcher.requests_client
            )
            filepath, response = download.get_filepath_and_response()

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response:
                self._add_url_cache(url, response.status_code)
            elif response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

        yield None, None

    def _set_dataset(self):
        dataset = dataset_converter(self.xml_dsd, self.dataset_code)
        self.dataset.dimension_keys = dataset["dimension_keys"]
        self.dataset.attribute_keys = dataset["attribute_keys"]
        self.dataset.concepts = dataset["concepts"]
        self.dataset.codelists = dataset["codelists"]

    def clean_field(self, bson):
        bson["attributes"].pop("TITLE", None)
        bson["attributes"].pop("TITLE_COMPL", None)
        bson = super().clean_field(bson)
        return bson

    def build_series(self, bson):
        self.dataset.add_frequency(bson["frequency"])
        bson["last_update"] = self.dataset.last_update

        return bson