Example #1
0
    def test_process_series_data(self):        

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_process_series_data

        self._collections_is_empty()
    
        provider_name = "p1"
        dataset_code = "d1"
        dataset_name = "d1 name"
    
        f = Fetcher(provider_name=provider_name, 
                    db=self.db)

        d = Datasets(provider_name=provider_name, 
                    dataset_code=dataset_code,
                    name=dataset_name,
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("Scale", "Billions", "Billions")
        d.dimension_list.update_entry("Country", "AFG", "AFG")
        
        s = Series(provider_name=f.provider_name, 
                   dataset_code=dataset_code, 
                   last_update=datetime(2013,10,28), 
                   bulk_size=1, 
                   fetcher=f)
        
        datas = FakeDatas(provider_name=provider_name, 
                          dataset_code=dataset_code,
                          fetcher=f)
        s.data_iterator = datas
        
        d.series = s
        d.update_database()        
        
        '''Count All series'''
        self.assertEqual(self.db[constants.COL_SERIES].count(), datas.max_record)

        '''Count series for this provider and dataset'''
        series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, 
                                                     "dataset_code": dataset_code})
        self.assertEqual(series.count(), datas.max_record)

        tags.update_tags(self.db, 
                    provider_name=f.provider_name, dataset_code=dataset_code,  
                    col_name=constants.COL_SERIES)        

        '''Count series for this provider and dataset and in keys[]'''
        series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, 
                                                     "dataset_code": dataset_code,
                                                     "key": {"$in": datas.keys}})
        
        self.assertEqual(series.count(), datas.max_record)
        

        for doc in series:
            self.assertTrue("tags" in doc)
            self.assertTrue(len(doc['tags']) > 0)
Example #2
0
    def fixtures(self):

        fetcher = Fetcher(provider_name="p1", 
                               db=self.db)

        max_record = 10
        
        d = Datasets(provider_name="eurostat", 
                    dataset_code="name_a",
                    name="Eurostat name_a",
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=fetcher, 
                    is_load_previous_version=False)
        
        d.dimension_list.update_entry("Country", "FRA", "France")
        d.dimension_list.update_entry("Country", "AUS", "Australie")
        d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars")
        d.dimension_list.update_entry("Scale", "Millions", "Millions Dollars")
        d.dimension_list.update_entry("Currency", "E", "Euro")
        d.dimension_list.update_entry("Currency", "D", "Dollars")
        d.dimension_list.update_entry("Sector", "agr", "Agriculture")
        d.dimension_list.update_entry("Sector", "ind", "Industrie")

        def dimensions_generator():
            return {
                'Country': choice(['FRA', 'AUS', 'FRA']),
                'Sector': choice(['agr', 'ind', 'agr']),
                'Currency': choice(['E', 'D', 'E']) 
            }
        
        datas = FakeDatas(provider_name=d.provider_name, 
                          dataset_code=d.dataset_code,
                          max_record=max_record,
                          dimensions_generator=dimensions_generator)
        d.series.data_iterator = datas
        _id = d.update_database()

        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_DATASETS, 
                    max_bulk=20)

        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_SERIES, 
                    max_bulk=20)
Example #3
0
    def test_update_tag(self):
        
        # nosetests -s -v dlstats.tests.test_search:DBTagsTestCase.test_update_tag
        
        max_record = 10
        
        d = Datasets(provider_name="eurostat", 
                    dataset_code="name_a",
                    name="Eurostat name_a",
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=self.fetcher, 
                    is_load_previous_version=False)
        
        d.dimension_list.update_entry("Country", "FRA", "France")
        d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars")
        
        datas = FakeDatas(provider_name=d.provider_name, 
                          dataset_code=d.dataset_code,
                          max_record=max_record)
        d.series.data_iterator = datas
        _id = d.update_database()

        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_DATASETS, 
                    max_bulk=20)
        
        utils.update_tags(self.db, 
                    provider_name=d.provider_name, 
                    dataset_code=d.dataset_code, 
                    col_name=constants.COL_SERIES, 
                    max_bulk=20)

        doc = self.db[constants.COL_DATASETS].find_one({"_id": _id})
        self.assertListEqual(doc['tags'], sorted(['eurostat', 'name_a', 'billions', 'dollars', 'france']))

        query = {'provider_name': d.provider_name, "dataset_code": d.dataset_code}
        series = self.db[constants.COL_SERIES].find(query)
        self.assertEqual(series.count(), max_record)
        
        for s in series:
            self.assertTrue(len(s['tags']) > 0)
Example #4
0
def cmd_update_tags(fetcher=None, dataset=None, collection=None, max_bulk=20, 
                    aggregate=False, **kwargs):
    """Create or Update field tags"""
    
    """
    Examples:
    
    dlstats fetchers update-tags -f BIS -d CNFS -S -c ALL
    dlstats fetchers update-tags -f BEA -d "10101 Ann" -S -c datasets
    dlstats fetchers update-tags -f BEA -d "10101 Ann" -S -c series
    dlstats fetchers update-tags -f Eurostat -d nama_10_a10 -S -c datasets
    dlstats fetchers update-tags -f OECD -d MEI -S -c datasets
    
    dlstats fetchers update-tags -f BIS -d CNFS -S -c ALL --aggregate
    """

    ctx = client.Context(**kwargs)

    ctx.log_ok("Run update tags for %s:" % fetcher)

    if ctx.silent or click.confirm('Do you want to continue?', abort=True):
        
        db = ctx.mongo_database()

        if collection == "ALL":
            cols = [constants.COL_DATASETS, constants.COL_SERIES]
        else:
            cols = [collection]
        
        for col in cols:
            #TODO: serie_key
            #TODO: cumul result et rapport
            result = tags.update_tags(db, 
                                       provider_name=fetcher, 
                                       dataset_code=dataset, 
                                       serie_key=None,
                                       col_name=col,
                                       max_bulk=max_bulk)

        if aggregate:
            result_datasets = tags.aggregate_tags_datasets(db, max_bulk=max_bulk)
            result_series = tags.aggregate_tags_series(db, max_bulk=max_bulk)