def test_process_series_data(self):        

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_process_series_data

        self._collections_is_empty()
    
        provider_name = "p1"
        dataset_code = "d1"
        dataset_name = "d1 name"
    
        f = Fetcher(provider_name=provider_name, 
                    db=self.db)

        d = Datasets(provider_name=provider_name, 
                    dataset_code=dataset_code,
                    name=dataset_name,
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("Scale", "Billions", "Billions")
        d.dimension_list.update_entry("Country", "AFG", "AFG")
        
        s = Series(provider_name=f.provider_name, 
                   dataset_code=dataset_code, 
                   last_update=datetime(2013,10,28), 
                   bulk_size=1, 
                   fetcher=f)
        
        datas = FakeDatas(provider_name=provider_name, 
                          dataset_code=dataset_code,
                          fetcher=f)
        s.data_iterator = datas
        
        d.series = s
        d.update_database()        
        
        '''Count All series'''
        self.assertEqual(self.db[constants.COL_SERIES].count(), datas.max_record)

        '''Count series for this provider and dataset'''
        series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, 
                                                     "dataset_code": dataset_code})
        self.assertEqual(series.count(), datas.max_record)

        tags.update_tags(self.db, 
                    provider_name=f.provider_name, dataset_code=dataset_code,  
                    col_name=constants.COL_SERIES)        

        '''Count series for this provider and dataset and in keys[]'''
        series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, 
                                                     "dataset_code": dataset_code,
                                                     "key": {"$in": datas.keys}})
        
        self.assertEqual(series.count(), datas.max_record)
        

        for doc in series:
            self.assertTrue("tags" in doc)
            self.assertTrue(len(doc['tags']) > 0)
Beispiel #2
0
    def upsert_dataset(self, dataset_code):
        """Updates data in Database for selected datasets
        """
        self.get_selected_datasets()

        doc = self.db[constants.COL_DATASETS].find_one(
            {'provider_name': self.provider_name, 'dataset_code': dataset_code},
            {'dataset_code': 1, 'last_update': 1})

        dataset_settings = self.selected_datasets[dataset_code]
        
        if doc and  doc['last_update'] >= dataset_settings['last_update']:
            comments = "update-date[%s]" % doc['last_update']
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)            

        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name=dataset_settings["name"], 
                           doc_href=dataset_settings["metadata"].get("doc_href"), 
                           last_update=dataset_settings["last_update"], 
                           fetcher=self)

        dataset.series.data_iterator = EurostatData(dataset)
        
        return dataset.update_database()
    def test_constructor(self):

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DatasetTestCase.test_constructor
        
        with self.assertRaises(ValueError):
            Datasets(is_load_previous_version=False)
            
        f = Fetcher(provider_name="p1", is_indexes=False)
                
        d = Datasets(provider_name="p1", 
                    dataset_code="d1",
                    name="d1 Name",
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("country", "country", "country")

        self.assertTrue(isinstance(d.series, Series))
        self.assertTrue(isinstance(d.dimension_list, CodeDict))
        self.assertTrue(isinstance(d.attribute_list, CodeDict))
        
        bson = d.bson
        self.assertEqual(bson['provider_name'], "p1")
        self.assertEqual(bson["dataset_code"], "d1")
        self.assertEqual(bson["name"], "d1 Name")
        self.assertEqual(bson["doc_href"], "http://www.example.com")
        self.assertTrue(isinstance(bson["dimension_list"], dict))
        self.assertTrue(isinstance(bson["attribute_list"], dict))
        self.assertIsNone(bson["last_update"])
        self.assertEqual(bson["slug"], "p1-d1")

        #TODO: last_update        
        d.last_update = datetime.now()
    def test_unique_constraint(self):

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_unique_constraint
    
        self._collections_is_empty()
        
        f = Fetcher(provider_name="p1", 
                    db=self.db)

        d = Datasets(provider_name="p1", 
                    dataset_code="d1",
                    name="d1 Name",
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("Country", "AFG", "AFG")
        d.dimension_list.update_entry("Scale", "Billions", "Billions")

        datas = FakeDatas(provider_name="p1", 
                          dataset_code="d1",
                          fetcher=f)
        d.series.data_iterator = datas

        result = d.update_database()
        self.assertIsNotNone(result)
        
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)
                        
        with self.assertRaises(DuplicateKeyError):
            existing_dataset = dict(provider_name="p1", dataset_code="d1")
            self.db[constants.COL_DATASETS].insert(existing_dataset)
Beispiel #5
0
    def upsert_dataset(self, dataset_code):
        
        start = time.time()
        logger.info("upsert dataset[%s] - START" % (dataset_code))
        
        #TODO: control si existe ou update !!!

        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code,
                           name=DATASETS[dataset_code]['name'],
                           doc_href=DATASETS[dataset_code]['doc_href'],
                           last_update=datetime.now(),
                           fetcher=self)
        
        _data = FED_Data(dataset=dataset, 
                         url=DATASETS[dataset_code]['url'])
        dataset.series.data_iterator = _data
        result = dataset.update_database()
        
        _data = None

        end = time.time() - start
        logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
        
        return result
Beispiel #6
0
    def upsert_dataset(self, dataset_code):
        """Updates data in Database for selected datasets
        """
        self.get_selected_datasets()

        doc = self.db[constants.COL_DATASETS].find_one(
            {
                'provider_name': self.provider_name,
                'dataset_code': dataset_code
            }, {
                'dataset_code': 1,
                'last_update': 1
            })

        dataset_settings = self.selected_datasets[dataset_code]

        if doc and doc['last_update'] >= dataset_settings['last_update']:
            comments = "update-date[%s]" % doc['last_update']
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)

        dataset = Datasets(
            provider_name=self.provider_name,
            dataset_code=dataset_code,
            name=dataset_settings["name"],
            doc_href=dataset_settings["metadata"].get("doc_href"),
            last_update=None,
            fetcher=self)
        dataset.last_update = dataset_settings["last_update"]

        dataset.series.data_iterator = EurostatData(dataset)

        return dataset.update_database()
Beispiel #7
0
    def upsert_dataset(self, dataset_code):
        
        start = time.time()
        logger.info("upsert dataset[%s] - START" % (dataset_code))
        
        #TODO: control si existe ou update !!!

        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=self.provider.website,
                           last_update=datetime.now(),
                           fetcher=self)
        
        _data = ECB_Data(dataset=dataset)
        dataset.series.data_iterator = _data
        try:
            result = dataset.update_database()
        except:
            raise
        
        _data = None

        end = time.time() - start
        logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
        
        return result
Beispiel #8
0
    def upsert_dataset(self, dataset_code):
        
        start = time.time()
        logger.info("upsert dataset[%s] - START" % (dataset_code))
        
        #TODO: control si existe ou update !!!

        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code,
                           name=DATASETS[dataset_code]['name'],
                           doc_href=DATASETS[dataset_code]['doc_href'],
                           last_update=datetime.now(),
                           fetcher=self)
        
        _data = DESTATIS_Data(dataset=dataset, 
                              ns_tag_data=DATASETS[dataset_code]["ns_tag_data"])
        dataset.series.data_iterator = _data
        result = dataset.update_database()
        
        _data = None

        end = time.time() - start
        logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
        
        return result
Beispiel #9
0
    def upsert_dataset(self, dataset_code):

        settings = self._get_datasets_settings()[dataset_code]

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=settings["name"],
                           doc_href='http://www.bea.gov',
                           fetcher=self)

        url = settings["metadata"]["url"]
        filename = settings["metadata"]["filename"]
        sheet_name = settings["metadata"]["sheet_name"]

        sheet = self._get_sheet(url, filename, sheet_name)
        fetcher_data = BeaData(dataset, url=url, sheet=sheet)

        if dataset.last_update and fetcher_data.release_date >= dataset.last_update and not self.force_update:
            comments = "update-date[%s]" % fetcher_data.release_date
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)

        dataset.last_update = fetcher_data.release_date
        dataset.series.data_iterator = fetcher_data

        return dataset.update_database()
Beispiel #10
0
    def upsert_dataset(self, dataset_code, datas=None):
        
        start = time.time()
        
        logger.info("upsert dataset[%s] - START" % (dataset_code))
        
        if not DATASETS.get(dataset_code):
            raise Exception("This dataset is unknown" + dataset_code)
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name=DATASETS[dataset_code]['name'], 
                           doc_href=DATASETS[dataset_code]['doc_href'],
                           fetcher=self)
        
        fetcher_data = OECD_Data(dataset)
        dataset.series.data_iterator = fetcher_data
        dataset.update_database()

        end = time.time() - start
        logger.info("upsert dataset[%s] - END-BEFORE-METAS - time[%.3f seconds]" % (dataset_code, end))

        self.update_metas(dataset_code)
        
        end = time.time() - start
        logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
Beispiel #11
0
    def upsert_dataset(self, dataset_code):
        
        settings = self._get_datasets_settings()[dataset_code]
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name=settings["name"], 
                           doc_href='http://www.bea.gov', 
                           fetcher=self)
        
        url = settings["metadata"]["url"]
        filename = settings["metadata"]["filename"]
        sheet_name = settings["metadata"]["sheet_name"]

        sheet = self._get_sheet(url, filename, sheet_name)
        fetcher_data = BeaData(dataset, url=url, sheet=sheet)
        
        if dataset.last_update and fetcher_data.release_date >= dataset.last_update and not self.force_update: 
            comments = "update-date[%s]" % fetcher_data.release_date
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)
        
        
        dataset.last_update = fetcher_data.release_date
        dataset.series.data_iterator = fetcher_data
        
        return dataset.update_database()
Beispiel #12
0
    def test_not_recordable_dataset(self):

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_not_recordable_dataset

        self._collections_is_empty()

        f = Fetcher(provider_name="p1", 
                    db=self.db)

        d = Datasets(provider_name="p1", 
                    dataset_code="d1",
                    name="d1 Name",
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("Scale", "Billions", "Billions")
        d.dimension_list.update_entry("country", "AFG", "AFG")
        
        class EmptySeriesIterator():
            def __next__(self):
                raise StopIteration            

        datas = EmptySeriesIterator()
        d.series.data_iterator = datas

        id = d.update_database()
        self.assertIsNone(id)
        
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 0)
Beispiel #13
0
 def upsert_dataset(self, dataset_code):
     self.get_selected_datasets()        
     self.dataset_settings = self.selected_datasets[dataset_code]
     dataset = Datasets(provider_name=self.provider_name,
                        dataset_code=dataset_code,
                        name=self.dataset_settings["name"],
                        last_update=self.dataset_settings['last_update'],
                        fetcher=self)
                        
     url = self.dataset_settings['metadata']['url']
     dataset.series.data_iterator = BDF_Data(dataset,url)        
     return dataset.update_database()
Beispiel #14
0
    def upsert_dataset(self, dataset_code):
        self.get_selected_datasets()
        self.dataset_settings = self.selected_datasets[dataset_code]
        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=self.dataset_settings["name"],
                           last_update=self.dataset_settings['last_update'],
                           fetcher=self)

        url = self.dataset_settings['metadata']['url']
        dataset.series.data_iterator = BDF_Data(dataset, url)
        return dataset.update_database()
Beispiel #15
0
 def upsert_gem(self, dataset_code):
     d = DATASETS[dataset_code]
     url = d['url']
     dataset = Datasets(provider_name=self.provider_name, 
                        dataset_code=dataset_code, 
                        name=d['name'], 
                        doc_href=d['doc_href'], 
                        fetcher=self)
     gem_data = GemData(dataset, url)
     dataset.last_update = gem_data.release_date
     dataset.series.data_iterator = gem_data
     dataset.update_database()
Beispiel #16
0
    def upsert_dataset(self, dataset_code):
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name="My Dataset Name",
                           last_update=clean_datetime(), 
                           fetcher=self)
        
        fetcher_data = DUMMY_Data(dataset)
        dataset.series.data_iterator = fetcher_data

        return dataset.update_database()
Beispiel #17
0
 def load_datasets_update(self):
     for d in self._parse_agenda():
         if d['dataflow_key'] in self.datasets_filter:
             dataset = Datasets(provider_name=self.provider_name,
                                dataset_code=d['dataflow_key'],
                                name=d['name'],
                                last_update=d['last_update'],
                                fetcher=self)
             url = d['url']                
             dataset.series.data_iterator = BDF_Data(dataset, url)
             dataset.update_database()    
             msg = "get update - provider[%s] - dataset[%s] - last-update-dataset[%s]"    
             logger.info(msg % (self.provider_name, d['dataflow_key'], d['last_update']))                  
Beispiel #18
0
    def upsert_dataset(self, dataset_code):

        self._load_structure()

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=self.provider.website,
                           fetcher=self)
        dataset.last_update = utils.clean_datetime()

        _data = ECB_Data(dataset=dataset)
        dataset.series.data_iterator = _data
        return dataset.update_database()
Beispiel #19
0
    def upsert_dataset(self, dataset_code):
        
        self._load_structure()
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=self.provider.website,
                           last_update=utils.clean_datetime(),
                           fetcher=self)

        _data = ECB_Data(dataset=dataset)
        dataset.series.data_iterator = _data
        return dataset.update_database()
Beispiel #20
0
    def _common_tests(self):
        
        self._collections_is_empty()
        
        url = DATASETS[self.dataset_code]['url']

        self.filepath = get_filepath(self.dataset_code)
        self.assertTrue(os.path.exists(self.filepath))
        
        httpretty.register_uri(httpretty.GET, 
                               url,
                               body=mock_streaming(self.filepath),
                               status=200,
                               content_type='application/octet-stream;charset=UTF-8',
                               streaming=True)
        
        # provider.update_database
        self.fetcher.provider.update_database()
        provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name})
        self.assertIsNotNone(provider)
        
        # upsert_data_tree
        self.fetcher.upsert_data_tree()
        provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name}) 
        self.assertIsNotNone(provider['data_tree'])
        
        dataset = Datasets(provider_name=self.fetcher.provider_name, 
                           dataset_code=self.dataset_code, 
                           name=DATASETS[self.dataset_code]['name'], 
                           doc_href=DATASETS[self.dataset_code]['doc_href'], 
                           fetcher=self.fetcher)

        fetcher_data = bis.BIS_Data(dataset,
                                    url=url, 
                                    filename=DATASETS[self.dataset_code]['filename'],
                                    store_filepath=os.path.dirname(self.filepath))
        
        dataset.series.data_iterator = fetcher_data
        dataset.update_database()

        self.dataset = self.db[constants.COL_DATASETS].find_one({'provider_name': self.fetcher.provider_name, 
                                                            "dataset_code": self.dataset_code})
        
        self.assertIsNotNone(self.dataset)
        
        self.assertEqual(len(self.dataset["dimension_list"]), DATASETS[self.dataset_code]["dimensions_count"])
        
        series = self.db[constants.COL_SERIES].find({'provider_name': self.fetcher.provider_name, 
                                                     "dataset_code": self.dataset_code})
        self.assertEqual(series.count(), SERIES_COUNT)
Beispiel #21
0
    def fixtures(self):

        fetcher = Fetcher(provider_name="p1", 
                               db=self.db)

        max_record = 10
        
        d = Datasets(provider_name="eurostat", 
                    dataset_code="name_a",
                    name="Eurostat name_a",
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=fetcher, 
                    is_load_previous_version=False)
        
        d.dimension_list.update_entry("Country", "FRA", "France")
        d.dimension_list.update_entry("Country", "AUS", "Australie")
        d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars")
        d.dimension_list.update_entry("Scale", "Millions", "Millions Dollars")
        d.dimension_list.update_entry("Currency", "E", "Euro")
        d.dimension_list.update_entry("Currency", "D", "Dollars")
        d.dimension_list.update_entry("Sector", "agr", "Agriculture")
        d.dimension_list.update_entry("Sector", "ind", "Industrie")

        def dimensions_generator():
            return {
                'Country': choice(['FRA', 'AUS', 'FRA']),
                'Sector': choice(['agr', 'ind', 'agr']),
                'Currency': choice(['E', 'D', 'E']) 
            }
        
        datas = FakeDatas(provider_name=d.provider_name, 
                          dataset_code=d.dataset_code,
                          max_record=max_record,
                          dimensions_generator=dimensions_generator)
        d.series.data_iterator = datas
        _id = d.update_database()

        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_DATASETS, 
                    max_bulk=20)

        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_SERIES, 
                    max_bulk=20)
Beispiel #22
0
    def upsert_dataset(self, dataset_code):
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name="My Dataset Name",
                           last_update=clean_datetime(), 
                           fetcher=self)
        dataset.codelists = {
            'COUNTRY': {'FRA': 'France'},
            'OBS_STATUS': {'A': "A"}
        }
        fetcher_data = DUMMY_Data(dataset)
        dataset.series.data_iterator = fetcher_data

        return dataset.update_database()
Beispiel #23
0
 def load_datasets_update(self):
     for d in self._parse_agenda():
         if d['dataflow_key'] in self.datasets_filter:
             dataset = Datasets(provider_name=self.provider_name,
                                dataset_code=d['dataflow_key'],
                                name=d['name'],
                                last_update=d['last_update'],
                                fetcher=self)
             url = d['url']
             dataset.series.data_iterator = BDF_Data(dataset, url)
             dataset.update_database()
             msg = "get update - provider[%s] - dataset[%s] - last-update-dataset[%s]"
             logger.info(
                 msg %
                 (self.provider_name, d['dataflow_key'], d['last_update']))
Beispiel #24
0
    def upsert_dataset(self, dataset_code):

        self._load_structure_dataflows()
        self._load_structure_concepts()

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=None,
                           fetcher=self)
        dataset.last_update = clean_datetime()

        insee_data = INSEE_Data(dataset)
        dataset.series.data_iterator = insee_data

        return dataset.update_database()
Beispiel #25
0
    def upsert_dataset(self, dataset_code):
        
        self.get_selected_datasets()
        
        dataset_settings = self.selected_datasets[dataset_code]

        #http://data.worldbank.org/indicator/AG.AGR.TRAC.NO
        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=dataset_settings["name"],
                           last_update=clean_datetime(),
                           fetcher=self)
        
        dataset.series.data_iterator = WorldBankAPIData(dataset, dataset_settings)
        
        return dataset.update_database()
Beispiel #26
0
    def upsert_dataset(self, dataset_code):

        self._load_structure_dataflows()
        self._load_structure_concepts()

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=None,
                           fetcher=self)
        dataset.last_update = clean_datetime()

        insee_data = INSEE_Data(dataset)
        dataset.series.data_iterator = insee_data

        return dataset.update_database()
Beispiel #27
0
    def upsert_dataset(self, dataset_code):

        if not DATASETS.get(dataset_code):
            raise Exception("This dataset is unknown" + dataset_code)

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=DATASETS[dataset_code]['name'],
                           doc_href=DATASETS[dataset_code]['doc_href'],
                           fetcher=self)
        dataset.last_update = clean_datetime()

        dataset.series.data_iterator = OECD_Data(
            dataset, sdmx_filter=DATASETS[dataset_code]['sdmx_filter'])

        return dataset.update_database()
Beispiel #28
0
 def upsert_dataset(self, dataset_code):
     
     if not DATASETS.get(dataset_code):
         raise Exception("This dataset is unknown" + dataset_code)
             
     dataset = Datasets(provider_name=self.provider_name, 
                        dataset_code=dataset_code, 
                        name=DATASETS[dataset_code]['name'], 
                        doc_href=DATASETS[dataset_code]['doc_href'],
                        last_update=clean_datetime(),
                        fetcher=self)
     
     dataset.series.data_iterator = OECD_Data(dataset, 
                                              sdmx_filter=DATASETS[dataset_code]['sdmx_filter'])
     
     return dataset.update_database()
Beispiel #29
0
    def upsert_dataset(self, dataset_code):

        #self.load_structure(force=False)
        
        start = time.time()
        logger.info("upsert dataset[%s] - START" % (dataset_code))
        
        #if not dataset_code in self._dataflows:
        #    raise Exception("This dataset is unknown: %s" % dataset_code)
        
        #dataflow = self._dataflows[dataset_code]
        
        #cat = self.db[constants.COL_CATEGORIES].find_one({'category_code': dataset_code})
        #dataset.name = cat['name']
        #dataset.doc_href = cat['doc_href']
        #dataset.last_update = cat['last_update']

        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code,
                           #name=dataflow.name.en,
                           doc_href=None,
                           last_update=datetime.now(), #TODO:
                           fetcher=self)
        
        dataset_doc = self.db[constants.COL_DATASETS].find_one({'provider_name': self.provider_name,
                                                                "dataset_code": dataset_code})
        
        insee_data = INSEE_Data(dataset=dataset,
                                dataset_doc=dataset_doc, 
                                #dataflow=dataflow, 
                                #sdmx=self.sdmx
                                )
        dataset.series.data_iterator = insee_data
        result = dataset.update_database()
        
        end = time.time() - start
        logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
        
        """
        > IDBANK:  A définir dynamiquement sur site ?
        doc_href d'une serie: http://www.bdm.insee.fr/bdm2/affichageSeries?idbank=001694226
        > CODE GROUPE: Balance des Paiements mensuelle - Compte de capital
        http://www.bdm.insee.fr/bdm2/choixCriteres?codeGroupe=1556
        """
        return result
Beispiel #30
0
    def upsert_dataset(self, dataset_code):

        self.get_selected_datasets()

        dataset_settings = self.selected_datasets[dataset_code]

        dataset = Datasets(
            provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], fetcher=self
        )

        if dataset_code in DATASETS:
            dataset.series.data_iterator = ExcelData(dataset, DATASETS[dataset_code]["url"])
            dataset.doc_href = DATASETS[dataset_code]["doc_href"]
        else:
            dataset.last_update = clean_datetime()
            dataset.series.data_iterator = WorldBankAPIData(dataset, dataset_settings)

        return dataset.update_database()
Beispiel #31
0
    def test_update_database(self):

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_update_database

        self._collections_is_empty()

        f = Fetcher(provider_name="p1", 
                    db=self.db)

        d = Datasets(provider_name="p1", 
                    dataset_code="d1",
                    name="d1 Name",
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("Scale", "Billions", "Billions")
        d.dimension_list.update_entry("country", "AFG", "AFG")

        datas = FakeDatas(provider_name="p1", 
                          dataset_code="d1",
                          fetcher=f)
        d.series.data_iterator = datas

        id = d.update_database()
        self.assertIsNotNone(id)
        self.assertIsInstance(id, ObjectId)
        self.db[constants.COL_DATASETS].find_one({'_id': ObjectId(id)})
        
        #print(result.raw)

        bson = self.db[constants.COL_DATASETS].find_one({'provider_name': "p1", "dataset_code": "d1"})
        self.assertIsNotNone(bson)
    
        self.assertEqual(bson['provider_name'], "p1")
        self.assertEqual(bson["dataset_code"], "d1")
        self.assertEqual(bson["name"], "d1 Name")
        self.assertEqual(bson["doc_href"], "http://www.example.com")
        self.assertTrue(isinstance(bson["dimension_list"], dict))
        self.assertTrue(isinstance(bson["attribute_list"], dict))

        series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, 
                                                     "dataset_code": d.dataset_code})
        self.assertEqual(series.count(), datas.max_record)
Beispiel #32
0
    def test_update_tag(self):
        
        # nosetests -s -v dlstats.tests.test_search:DBTagsTestCase.test_update_tag
        
        max_record = 10
        
        d = Datasets(provider_name="eurostat", 
                    dataset_code="name_a",
                    name="Eurostat name_a",
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=self.fetcher, 
                    is_load_previous_version=False)
        
        d.dimension_list.update_entry("Country", "FRA", "France")
        d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars")
        
        datas = FakeDatas(provider_name=d.provider_name, 
                          dataset_code=d.dataset_code,
                          max_record=max_record)
        d.series.data_iterator = datas
        _id = d.update_database()

        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_DATASETS, 
                    max_bulk=20)
        
        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_SERIES, 
                    max_bulk=20)

        doc = self.db[constants.COL_DATASETS].find_one({"_id": _id})
        self.assertListEqual(doc['tags'], sorted(['eurostat', 'name_a', 'billions', 'dollars', 'france']))

        query = {'provider_name': d.provider_name, "dataset_code": d.dataset_code}
        series = self.db[constants.COL_SERIES].find(query)
        self.assertEqual(series.count(), max_record)
        
        for s in series:
            self.assertTrue(len(s['tags']) > 0)
Beispiel #33
0
    def upsert_dataset(self, dataset_code):
        
        settings = DATASETS[dataset_code]
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name=settings['name'], 
                           doc_href=settings['doc_href'],
                           fetcher=self)

        klass = None
        if dataset_code in DATASETS_KLASS:
            klass = DATASETS_KLASS[dataset_code]
        else:
            klass = DATASETS_KLASS["XML"]

        dataset.series.data_iterator = klass(dataset)
        
        return dataset.update_database()
Beispiel #34
0
    def upsert_dataset(self, dataset_code):

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name="My Dataset Name",
                           last_update=clean_datetime(),
                           fetcher=self)
        dataset.codelists = {
            'COUNTRY': {
                'FRA': 'France'
            },
            'OBS_STATUS': {
                'A': "A"
            }
        }
        fetcher_data = DUMMY_Data(dataset)
        dataset.series.data_iterator = fetcher_data

        return dataset.update_database()
Beispiel #35
0
 def upsert_weo_issue(self, url, dataset_code):
     
     settings = DATASETS[dataset_code]
     
     dataset = Datasets(provider_name=self.provider_name, 
                        dataset_code=dataset_code, 
                        name=settings['name'], 
                        doc_href=settings['doc_href'], 
                        fetcher=self)
     
     weo_data = WeoData(dataset, url)
     dataset.last_update = weo_data.release_date        
     dataset.attribute_list.update_entry('flags','e','Estimated')
     dataset.series.data_iterator = weo_data
     try:
         dataset.update_database()
         self.update_metas(dataset_code)
     except Exception as err:
         logger.error(str(err))
Beispiel #36
0
    def upsert_dataset(self, dataset_code):
        """Updates data in Database for selected datasets
        :dset: dataset_code
        :returns: None"""
        self.get_selected_datasets()
        
        self.dataset_settings = self.selected_datasets[dataset_code]        
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name=self.dataset_settings['name'], 
                           doc_href=self.dataset_settings['metadata']['doc_href'], 
                           last_update=self.dataset_settings['last_update'], 
                           fetcher=self)

        url = self.dataset_settings['metadata']['url']
        dataset.series.data_iterator = EsriData(dataset, url)
        
        return dataset.update_database()
Beispiel #37
0
    def upsert_dataset(self, dataset_code):

        settings = DATASETS[dataset_code]

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=settings['name'],
                           doc_href=settings['doc_href'],
                           fetcher=self)

        klass = None
        if dataset_code in DATASETS_KLASS:
            klass = DATASETS_KLASS[dataset_code]
        else:
            klass = DATASETS_KLASS["XML"]

        dataset.series.data_iterator = klass(dataset)

        return dataset.update_database()
Beispiel #38
0
    def _common_tests(self):

        self._collections_is_empty()

        self.filepath = get_filepath(self.dataset_code)
        self.assertTrue(os.path.exists(self.filepath))

        # provider.update_database
        self.fetcher.provider.update_database()
        provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name})
        self.assertIsNotNone(provider)

        dataset = Datasets(
            provider_name=self.fetcher.provider_name,
            dataset_code=self.dataset_code,
            name=DATASETS[self.dataset_code]["name"],
            last_update=DATASETS[self.dataset_code]["last_update"],
            fetcher=self.fetcher,
        )

        # manual Data for iterator
        fetcher_data = esri.EsriData(dataset, make_url(self), filename=DATASETS[self.dataset_code]["filename"])
        dataset.series.data_iterator = fetcher_data
        dataset.last_update = DATASETS[self.dataset_code]["last_update"]
        dataset.update_database()

        self.dataset = self.db[constants.COL_DATASETS].find_one(
            {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code}
        )

        self.assertIsNotNone(self.dataset)

        dimensions = self.dataset["dimension_list"]
        self.assertEqual(len(dimensions), DATASETS[self.dataset_code]["dimension_count"])
        for c in dimensions["concept"]:
            self.assertIn(c[1], DATASETS["series_names"])

        series = self.db[constants.COL_SERIES].find(
            {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code}
        )

        self.assertEqual(series.count(), DATASETS[self.dataset_code]["series_count"])
Beispiel #39
0
    def upsert_dataset(self, dataset_code):
        """Updates data in Database for selected datasets
        :dset: dataset_code
        :returns: None"""
        self.get_selected_datasets()

        self.dataset_settings = self.selected_datasets[dataset_code]

        dataset = Datasets(
            provider_name=self.provider_name,
            dataset_code=dataset_code,
            name=self.dataset_settings['name'],
            doc_href=self.dataset_settings['metadata']['doc_href'],
            last_update=self.dataset_settings['last_update'],
            fetcher=self)

        url = self.dataset_settings['metadata']['url']
        dataset.series.data_iterator = EsriData(dataset, url)

        return dataset.update_database()
Beispiel #40
0
    def upsert_dataset(self, dataset_code):

        self._load_structure()

        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=None,
                           last_update=clean_datetime(),
                           fetcher=self)
        
        query = {'provider_name': self.provider_name, 
                 "dataset_code": dataset_code}        
        dataset_doc = self.db[constants.COL_DATASETS].find_one(query)
        
        insee_data = INSEE_Data(dataset,
                                dataset_doc=dataset_doc)
        dataset.series.data_iterator = insee_data
        
        return dataset.update_database()
Beispiel #41
0
    def upsert_dataset(self, dataset_code):

        self.get_selected_datasets()

        dataset_settings = self.selected_datasets[dataset_code]

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=dataset_settings["name"],
                           fetcher=self)

        if dataset_code in DATASETS:
            dataset.series.data_iterator = ExcelData(
                dataset, DATASETS[dataset_code]["url"])
            dataset.doc_href = DATASETS[dataset_code]["doc_href"]
        else:
            dataset.last_update = clean_datetime()
            dataset.series.data_iterator = WorldBankAPIData(
                dataset, dataset_settings)

        return dataset.update_database()
Beispiel #42
0
    def upsert_dataset(self, dataset_code):

        start = time.time()

        logger.info("upsert dataset[%s] - START" % (dataset_code))

        if not DATASETS.get(dataset_code):
            raise Exception("This dataset is unknown" + dataset_code)

        dataset = Datasets(
            provider_name=self.provider_name,
            dataset_code=dataset_code,
            name=DATASETS[dataset_code]["name"],
            doc_href=DATASETS[dataset_code]["doc_href"],
            fetcher=self,
        )

        fetcher_data = BIS_Data(dataset, url=DATASETS[dataset_code]["url"], filename=DATASETS[dataset_code]["filename"])

        if fetcher_data.is_updated():

            dataset.series.data_iterator = fetcher_data
            dataset.update_database()

            # TODO: clean datas (file temp)

            end = time.time() - start
            logger.info("upsert dataset[%s] - END-BEFORE-METAS - time[%.3f seconds]" % (dataset_code, end))

            self.update_metas(dataset_code)

            end = time.time() - start
            logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
        else:
            logger.info(
                "upsert dataset[%s] bypass because is updated from release_date[%s]"
                % (dataset_code, fetcher_data.release_date)
            )
Beispiel #43
0
    def upsert_dataset(self, dataset_code):

        if not DATASETS.get(dataset_code):
            raise Exception("This dataset is unknown" + dataset_code)

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=DATASETS[dataset_code]['name'],
                           doc_href=DATASETS[dataset_code]['doc_href'],
                           fetcher=self)

        fetcher_data = BIS_Data(dataset,
                                url=DATASETS[dataset_code]['url'],
                                filename=DATASETS[dataset_code]['filename'],
                                frequency=DATASETS[dataset_code]['frequency'])

        if fetcher_data.is_updated():
            dataset.series.data_iterator = fetcher_data
            return dataset.update_database()
        else:
            comments = "update-date[%s]" % fetcher_data.release_date
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)