コード例 #1
0
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys,
                                                            dimensions,
                                                            choice="max")

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(url=url,
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)

        yield None, None
コード例 #2
0
ファイル: oecd.py プロジェクト: gitter-badger/dlstats
    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        dimension_keys, dimensions = self._get_dimensions_from_dsd()
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max")
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
            
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client
                                  )
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)
            
            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None
コード例 #3
0
ファイル: oecd.py プロジェクト: gitter-badger/dlstats
class OECD_Data(SeriesIterator):
    
    def __init__(self, dataset=None, sdmx_filter=None):
        super().__init__(dataset)
        
        self.real_dataset_code = self.dataset_code
        
        self.sdmx_filter = sdmx_filter

        self.store_path = self.get_store_path()
        self.xml_dsd = XMLStructure(provider_name=self.provider_name)        
        
        self._load_dsd()
        self.rows = self._get_data_by_dimension()

    def _get_url_dsd(self):
        """
        if self.dataset_code == "EO":
            self.dataset_code = "EO95_LTB"
            return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/EO95_LTB"
        """
        return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/%s" % self.dataset_code 

    def _get_url_data(self):
        """
        if self.dataset_code == "EO":
            self.dataset_code = "EO95_LTB"
            return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/EO95_LTB"
            
        db.datasets.bulkWrite([{ deleteMany:  { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false })
        db.series.bulkWrite([{ deleteMany:  { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false })
        """
        return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/%s" % self.dataset_code 
        
    def _load_dsd(self):
        url = self._get_url_dsd()
        download = Downloader(store_filepath=self.store_path,
                              url=url, 
                              filename="dsd-%s.xml" % self.dataset_code,
                              use_existing_file=self.fetcher.use_existing_file,
                              client=self.fetcher.requests_client)
        filepath = download.get_filepath()
        self.fetcher.for_delete.append(filepath)
        
        self.xml_dsd.process(filepath)
        self._set_dataset()

    def _set_dataset(self):

        dataset = dataset_converter(self.xml_dsd, self.dataset_code)
        self.dataset.dimension_keys = dataset["dimension_keys"] 
        self.dataset.attribute_keys = dataset["attribute_keys"] 
        self.dataset.concepts = dataset["concepts"] 
        self.dataset.codelists = dataset["codelists"]

    def _get_dimensions_from_dsd(self):
        return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code)

    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)
        
        dimension_keys, dimensions = self._get_dimensions_from_dsd()
        
        position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max")
        
        count_dimensions = len(dimension_keys)
        
        for dimension_value in dimension_values:
            
            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url, 
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client
                                  )
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)
            
            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()
            
            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)
        
        yield None, None
        
    def build_series(self, bson):
        bson["last_update"] = self.dataset.last_update
        bson["dataset_code"] = self.real_dataset_code
        self.dataset.add_frequency(bson["frequency"])
        return bson
コード例 #4
0
class OECD_Data(SeriesIterator):
    def __init__(self, dataset=None, sdmx_filter=None):
        super().__init__(dataset)

        self.real_dataset_code = self.dataset_code

        self.sdmx_filter = sdmx_filter

        self.store_path = self.get_store_path()
        self.xml_dsd = XMLStructure(provider_name=self.provider_name)

        self._load_dsd()
        self.rows = self._get_data_by_dimension()

    def _get_url_dsd(self):
        """
        if self.dataset_code == "EO":
            self.dataset_code = "EO95_LTB"
            return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/EO95_LTB"
        """
        return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/%s" % self.dataset_code

    def _get_url_data(self):
        """
        if self.dataset_code == "EO":
            self.dataset_code = "EO95_LTB"
            return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/EO95_LTB"
            
        db.datasets.bulkWrite([{ deleteMany:  { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false })
        db.series.bulkWrite([{ deleteMany:  { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false })
        """
        return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/%s" % self.dataset_code

    def _load_dsd(self):
        url = self._get_url_dsd()
        download = Downloader(store_filepath=self.store_path,
                              url=url,
                              filename="dsd-%s.xml" % self.dataset_code,
                              use_existing_file=self.fetcher.use_existing_file,
                              client=self.fetcher.requests_client)
        filepath = download.get_filepath()
        self.fetcher.for_delete.append(filepath)

        self.xml_dsd.process(filepath)
        self._set_dataset()

    def _set_dataset(self):

        dataset = dataset_converter(self.xml_dsd, self.dataset_code)
        self.dataset.dimension_keys = dataset["dimension_keys"]
        self.dataset.attribute_keys = dataset["attribute_keys"]
        self.dataset.concepts = dataset["concepts"]
        self.dataset.codelists = dataset["codelists"]

    def _get_dimensions_from_dsd(self):
        return get_dimensions_from_dsd(self.xml_dsd, self.provider_name,
                                       self.dataset_code)

    def _get_data_by_dimension(self):

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                xml_dsd=self.xml_dsd,
                                dsd_id=self.dataset_code,
                                frequencies_supported=FREQUENCIES_SUPPORTED)

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys,
                                                            dimensions,
                                                            choice="max")

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            sdmx_key = []
            for i in range(count_dimensions):
                if i == position:
                    sdmx_key.append(dimension_value)
                else:
                    sdmx_key.append(".")
            key = "".join(sdmx_key)

            url = "%s/%s" % (self._get_url_data(), key)
            filename = "data-%s-%s.xml" % (self.dataset_code,
                                           key.replace(".", "_"))
            download = Downloader(url=url,
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  client=self.fetcher.requests_client)
            filepath, response = download.get_filepath_and_response()

            if filepath:
                self.fetcher.for_delete.append(filepath)

            if response.status_code >= 400 and response.status_code < 500:
                continue
            elif response.status_code >= 500:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)

        yield None, None

    def build_series(self, bson):
        bson["last_update"] = self.dataset.last_update
        bson["dataset_code"] = self.real_dataset_code
        self.dataset.add_frequency(bson["frequency"])
        return bson