def test_process_series_data(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_process_series_data self._collections_is_empty() provider_name = "p1" dataset_code = "d1" dataset_name = "d1 name" f = Fetcher(provider_name=provider_name, db=self.db) d = Datasets(provider_name=provider_name, dataset_code=dataset_code, name=dataset_name, last_update=datetime.now(), doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("Scale", "Billions", "Billions") d.dimension_list.update_entry("Country", "AFG", "AFG") s = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2013,10,28), bulk_size=1, fetcher=f) datas = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) s.data_iterator = datas d.series = s d.update_database() '''Count All series''' self.assertEqual(self.db[constants.COL_SERIES].count(), datas.max_record) '''Count series for this provider and dataset''' series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, "dataset_code": dataset_code}) self.assertEqual(series.count(), datas.max_record) tags.update_tags(self.db, provider_name=f.provider_name, dataset_code=dataset_code, col_name=constants.COL_SERIES) '''Count series for this provider and dataset and in keys[]''' series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, "dataset_code": dataset_code, "key": {"$in": datas.keys}}) self.assertEqual(series.count(), datas.max_record) for doc in series: self.assertTrue("tags" in doc) self.assertTrue(len(doc['tags']) > 0)
def fixtures(self): fetcher = Fetcher(provider_name="p1", db=self.db) max_record = 10 d = Datasets(provider_name="eurostat", dataset_code="name_a", name="Eurostat name_a", last_update=datetime.now(), doc_href="http://www.example.com", fetcher=fetcher, is_load_previous_version=False) d.dimension_list.update_entry("Country", "FRA", "France") d.dimension_list.update_entry("Country", "AUS", "Australie") d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars") d.dimension_list.update_entry("Scale", "Millions", "Millions Dollars") d.dimension_list.update_entry("Currency", "E", "Euro") d.dimension_list.update_entry("Currency", "D", "Dollars") d.dimension_list.update_entry("Sector", "agr", "Agriculture") d.dimension_list.update_entry("Sector", "ind", "Industrie") def dimensions_generator(): return { 'Country': choice(['FRA', 'AUS', 'FRA']), 'Sector': choice(['agr', 'ind', 'agr']), 'Currency': choice(['E', 'D', 'E']) } datas = FakeDatas(provider_name=d.provider_name, dataset_code=d.dataset_code, max_record=max_record, dimensions_generator=dimensions_generator) d.series.data_iterator = datas _id = d.update_database() utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_DATASETS, max_bulk=20) utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_SERIES, max_bulk=20)
def test_update_tag(self): # nosetests -s -v dlstats.tests.test_search:DBTagsTestCase.test_update_tag max_record = 10 d = Datasets(provider_name="eurostat", dataset_code="name_a", name="Eurostat name_a", last_update=datetime.now(), doc_href="http://www.example.com", fetcher=self.fetcher, is_load_previous_version=False) d.dimension_list.update_entry("Country", "FRA", "France") d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars") datas = FakeDatas(provider_name=d.provider_name, dataset_code=d.dataset_code, max_record=max_record) d.series.data_iterator = datas _id = d.update_database() utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_DATASETS, max_bulk=20) utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_SERIES, max_bulk=20) doc = self.db[constants.COL_DATASETS].find_one({"_id": _id}) self.assertListEqual(doc['tags'], sorted(['eurostat', 'name_a', 'billions', 'dollars', 'france'])) query = {'provider_name': d.provider_name, "dataset_code": d.dataset_code} series = self.db[constants.COL_SERIES].find(query) self.assertEqual(series.count(), max_record) for s in series: self.assertTrue(len(s['tags']) > 0)
def cmd_update_tags(fetcher=None, dataset=None, collection=None, max_bulk=20, aggregate=False, **kwargs): """Create or Update field tags""" """ Examples: dlstats fetchers update-tags -f BIS -d CNFS -S -c ALL dlstats fetchers update-tags -f BEA -d "10101 Ann" -S -c datasets dlstats fetchers update-tags -f BEA -d "10101 Ann" -S -c series dlstats fetchers update-tags -f Eurostat -d nama_10_a10 -S -c datasets dlstats fetchers update-tags -f OECD -d MEI -S -c datasets dlstats fetchers update-tags -f BIS -d CNFS -S -c ALL --aggregate """ ctx = client.Context(**kwargs) ctx.log_ok("Run update tags for %s:" % fetcher) if ctx.silent or click.confirm('Do you want to continue?', abort=True): db = ctx.mongo_database() if collection == "ALL": cols = [constants.COL_DATASETS, constants.COL_SERIES] else: cols = [collection] for col in cols: #TODO: serie_key #TODO: cumul result et rapport result = tags.update_tags(db, provider_name=fetcher, dataset_code=dataset, serie_key=None, col_name=col, max_bulk=max_bulk) if aggregate: result_datasets = tags.aggregate_tags_datasets(db, max_bulk=max_bulk) result_series = tags.aggregate_tags_series(db, max_bulk=max_bulk)