def test_process_series_data(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_process_series_data self._collections_is_empty() provider_name = "p1" dataset_code = "d1" dataset_name = "d1 name" f = Fetcher(provider_name=provider_name, db=self.db) d = Datasets(provider_name=provider_name, dataset_code=dataset_code, name=dataset_name, last_update=datetime.now(), doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("Scale", "Billions", "Billions") d.dimension_list.update_entry("Country", "AFG", "AFG") s = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2013,10,28), bulk_size=1, fetcher=f) datas = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) s.data_iterator = datas d.series = s d.update_database() '''Count All series''' self.assertEqual(self.db[constants.COL_SERIES].count(), datas.max_record) '''Count series for this provider and dataset''' series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, "dataset_code": dataset_code}) self.assertEqual(series.count(), datas.max_record) tags.update_tags(self.db, provider_name=f.provider_name, dataset_code=dataset_code, col_name=constants.COL_SERIES) '''Count series for this provider and dataset and in keys[]''' series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, "dataset_code": dataset_code, "key": {"$in": datas.keys}}) self.assertEqual(series.count(), datas.max_record) for doc in series: self.assertTrue("tags" in doc) self.assertTrue(len(doc['tags']) > 0)
def upsert_dataset(self, dataset_code): """Updates data in Database for selected datasets """ self.get_selected_datasets() doc = self.db[constants.COL_DATASETS].find_one( {'provider_name': self.provider_name, 'dataset_code': dataset_code}, {'dataset_code': 1, 'last_update': 1}) dataset_settings = self.selected_datasets[dataset_code] if doc and doc['last_update'] >= dataset_settings['last_update']: comments = "update-date[%s]" % doc['last_update'] raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=dataset_code, comments=comments) dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], doc_href=dataset_settings["metadata"].get("doc_href"), last_update=dataset_settings["last_update"], fetcher=self) dataset.series.data_iterator = EurostatData(dataset) return dataset.update_database()
def test_constructor(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DatasetTestCase.test_constructor with self.assertRaises(ValueError): Datasets(is_load_previous_version=False) f = Fetcher(provider_name="p1", is_indexes=False) d = Datasets(provider_name="p1", dataset_code="d1", name="d1 Name", doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("country", "country", "country") self.assertTrue(isinstance(d.series, Series)) self.assertTrue(isinstance(d.dimension_list, CodeDict)) self.assertTrue(isinstance(d.attribute_list, CodeDict)) bson = d.bson self.assertEqual(bson['provider_name'], "p1") self.assertEqual(bson["dataset_code"], "d1") self.assertEqual(bson["name"], "d1 Name") self.assertEqual(bson["doc_href"], "http://www.example.com") self.assertTrue(isinstance(bson["dimension_list"], dict)) self.assertTrue(isinstance(bson["attribute_list"], dict)) self.assertIsNone(bson["last_update"]) self.assertEqual(bson["slug"], "p1-d1") #TODO: last_update d.last_update = datetime.now()
def test_unique_constraint(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_unique_constraint self._collections_is_empty() f = Fetcher(provider_name="p1", db=self.db) d = Datasets(provider_name="p1", dataset_code="d1", name="d1 Name", last_update=datetime.now(), doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("Country", "AFG", "AFG") d.dimension_list.update_entry("Scale", "Billions", "Billions") datas = FakeDatas(provider_name="p1", dataset_code="d1", fetcher=f) d.series.data_iterator = datas result = d.update_database() self.assertIsNotNone(result) self.assertEqual(self.db[constants.COL_DATASETS].count(), 1) with self.assertRaises(DuplicateKeyError): existing_dataset = dict(provider_name="p1", dataset_code="d1") self.db[constants.COL_DATASETS].insert(existing_dataset)
def upsert_dataset(self, dataset_code): start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) #TODO: control si existe ou update !!! dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], last_update=datetime.now(), fetcher=self) _data = FED_Data(dataset=dataset, url=DATASETS[dataset_code]['url']) dataset.series.data_iterator = _data result = dataset.update_database() _data = None end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end)) return result
def upsert_dataset(self, dataset_code): """Updates data in Database for selected datasets """ self.get_selected_datasets() doc = self.db[constants.COL_DATASETS].find_one( { 'provider_name': self.provider_name, 'dataset_code': dataset_code }, { 'dataset_code': 1, 'last_update': 1 }) dataset_settings = self.selected_datasets[dataset_code] if doc and doc['last_update'] >= dataset_settings['last_update']: comments = "update-date[%s]" % doc['last_update'] raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=dataset_code, comments=comments) dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], doc_href=dataset_settings["metadata"].get("doc_href"), last_update=None, fetcher=self) dataset.last_update = dataset_settings["last_update"] dataset.series.data_iterator = EurostatData(dataset) return dataset.update_database()
def upsert_dataset(self, dataset_code): start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) #TODO: control si existe ou update !!! dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=self.provider.website, last_update=datetime.now(), fetcher=self) _data = ECB_Data(dataset=dataset) dataset.series.data_iterator = _data try: result = dataset.update_database() except: raise _data = None end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end)) return result
def upsert_dataset(self, dataset_code): start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) #TODO: control si existe ou update !!! dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], last_update=datetime.now(), fetcher=self) _data = DESTATIS_Data(dataset=dataset, ns_tag_data=DATASETS[dataset_code]["ns_tag_data"]) dataset.series.data_iterator = _data result = dataset.update_database() _data = None end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end)) return result
def upsert_dataset(self, dataset_code): settings = self._get_datasets_settings()[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=settings["name"], doc_href='http://www.bea.gov', fetcher=self) url = settings["metadata"]["url"] filename = settings["metadata"]["filename"] sheet_name = settings["metadata"]["sheet_name"] sheet = self._get_sheet(url, filename, sheet_name) fetcher_data = BeaData(dataset, url=url, sheet=sheet) if dataset.last_update and fetcher_data.release_date >= dataset.last_update and not self.force_update: comments = "update-date[%s]" % fetcher_data.release_date raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=dataset_code, comments=comments) dataset.last_update = fetcher_data.release_date dataset.series.data_iterator = fetcher_data return dataset.update_database()
def upsert_dataset(self, dataset_code, datas=None): start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) if not DATASETS.get(dataset_code): raise Exception("This dataset is unknown" + dataset_code) dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], fetcher=self) fetcher_data = OECD_Data(dataset) dataset.series.data_iterator = fetcher_data dataset.update_database() end = time.time() - start logger.info("upsert dataset[%s] - END-BEFORE-METAS - time[%.3f seconds]" % (dataset_code, end)) self.update_metas(dataset_code) end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
def test_not_recordable_dataset(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_not_recordable_dataset self._collections_is_empty() f = Fetcher(provider_name="p1", db=self.db) d = Datasets(provider_name="p1", dataset_code="d1", name="d1 Name", last_update=datetime.now(), doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("Scale", "Billions", "Billions") d.dimension_list.update_entry("country", "AFG", "AFG") class EmptySeriesIterator(): def __next__(self): raise StopIteration datas = EmptySeriesIterator() d.series.data_iterator = datas id = d.update_database() self.assertIsNone(id) self.assertEqual(self.db[constants.COL_DATASETS].count(), 0)
def upsert_dataset(self, dataset_code): self.get_selected_datasets() self.dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=self.dataset_settings["name"], last_update=self.dataset_settings['last_update'], fetcher=self) url = self.dataset_settings['metadata']['url'] dataset.series.data_iterator = BDF_Data(dataset,url) return dataset.update_database()
def upsert_dataset(self, dataset_code): self.get_selected_datasets() self.dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=self.dataset_settings["name"], last_update=self.dataset_settings['last_update'], fetcher=self) url = self.dataset_settings['metadata']['url'] dataset.series.data_iterator = BDF_Data(dataset, url) return dataset.update_database()
def upsert_gem(self, dataset_code): d = DATASETS[dataset_code] url = d['url'] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=d['name'], doc_href=d['doc_href'], fetcher=self) gem_data = GemData(dataset, url) dataset.last_update = gem_data.release_date dataset.series.data_iterator = gem_data dataset.update_database()
def upsert_dataset(self, dataset_code): dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name="My Dataset Name", last_update=clean_datetime(), fetcher=self) fetcher_data = DUMMY_Data(dataset) dataset.series.data_iterator = fetcher_data return dataset.update_database()
def load_datasets_update(self): for d in self._parse_agenda(): if d['dataflow_key'] in self.datasets_filter: dataset = Datasets(provider_name=self.provider_name, dataset_code=d['dataflow_key'], name=d['name'], last_update=d['last_update'], fetcher=self) url = d['url'] dataset.series.data_iterator = BDF_Data(dataset, url) dataset.update_database() msg = "get update - provider[%s] - dataset[%s] - last-update-dataset[%s]" logger.info(msg % (self.provider_name, d['dataflow_key'], d['last_update']))
def upsert_dataset(self, dataset_code): self._load_structure() dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=self.provider.website, fetcher=self) dataset.last_update = utils.clean_datetime() _data = ECB_Data(dataset=dataset) dataset.series.data_iterator = _data return dataset.update_database()
def upsert_dataset(self, dataset_code): self._load_structure() dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=self.provider.website, last_update=utils.clean_datetime(), fetcher=self) _data = ECB_Data(dataset=dataset) dataset.series.data_iterator = _data return dataset.update_database()
def _common_tests(self): self._collections_is_empty() url = DATASETS[self.dataset_code]['url'] self.filepath = get_filepath(self.dataset_code) self.assertTrue(os.path.exists(self.filepath)) httpretty.register_uri(httpretty.GET, url, body=mock_streaming(self.filepath), status=200, content_type='application/octet-stream;charset=UTF-8', streaming=True) # provider.update_database self.fetcher.provider.update_database() provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name}) self.assertIsNotNone(provider) # upsert_data_tree self.fetcher.upsert_data_tree() provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name}) self.assertIsNotNone(provider['data_tree']) dataset = Datasets(provider_name=self.fetcher.provider_name, dataset_code=self.dataset_code, name=DATASETS[self.dataset_code]['name'], doc_href=DATASETS[self.dataset_code]['doc_href'], fetcher=self.fetcher) fetcher_data = bis.BIS_Data(dataset, url=url, filename=DATASETS[self.dataset_code]['filename'], store_filepath=os.path.dirname(self.filepath)) dataset.series.data_iterator = fetcher_data dataset.update_database() self.dataset = self.db[constants.COL_DATASETS].find_one({'provider_name': self.fetcher.provider_name, "dataset_code": self.dataset_code}) self.assertIsNotNone(self.dataset) self.assertEqual(len(self.dataset["dimension_list"]), DATASETS[self.dataset_code]["dimensions_count"]) series = self.db[constants.COL_SERIES].find({'provider_name': self.fetcher.provider_name, "dataset_code": self.dataset_code}) self.assertEqual(series.count(), SERIES_COUNT)
def fixtures(self): fetcher = Fetcher(provider_name="p1", db=self.db) max_record = 10 d = Datasets(provider_name="eurostat", dataset_code="name_a", name="Eurostat name_a", last_update=datetime.now(), doc_href="http://www.example.com", fetcher=fetcher, is_load_previous_version=False) d.dimension_list.update_entry("Country", "FRA", "France") d.dimension_list.update_entry("Country", "AUS", "Australie") d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars") d.dimension_list.update_entry("Scale", "Millions", "Millions Dollars") d.dimension_list.update_entry("Currency", "E", "Euro") d.dimension_list.update_entry("Currency", "D", "Dollars") d.dimension_list.update_entry("Sector", "agr", "Agriculture") d.dimension_list.update_entry("Sector", "ind", "Industrie") def dimensions_generator(): return { 'Country': choice(['FRA', 'AUS', 'FRA']), 'Sector': choice(['agr', 'ind', 'agr']), 'Currency': choice(['E', 'D', 'E']) } datas = FakeDatas(provider_name=d.provider_name, dataset_code=d.dataset_code, max_record=max_record, dimensions_generator=dimensions_generator) d.series.data_iterator = datas _id = d.update_database() utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_DATASETS, max_bulk=20) utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_SERIES, max_bulk=20)
def upsert_dataset(self, dataset_code): dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name="My Dataset Name", last_update=clean_datetime(), fetcher=self) dataset.codelists = { 'COUNTRY': {'FRA': 'France'}, 'OBS_STATUS': {'A': "A"} } fetcher_data = DUMMY_Data(dataset) dataset.series.data_iterator = fetcher_data return dataset.update_database()
def load_datasets_update(self): for d in self._parse_agenda(): if d['dataflow_key'] in self.datasets_filter: dataset = Datasets(provider_name=self.provider_name, dataset_code=d['dataflow_key'], name=d['name'], last_update=d['last_update'], fetcher=self) url = d['url'] dataset.series.data_iterator = BDF_Data(dataset, url) dataset.update_database() msg = "get update - provider[%s] - dataset[%s] - last-update-dataset[%s]" logger.info( msg % (self.provider_name, d['dataflow_key'], d['last_update']))
def upsert_dataset(self, dataset_code): self._load_structure_dataflows() self._load_structure_concepts() dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=None, fetcher=self) dataset.last_update = clean_datetime() insee_data = INSEE_Data(dataset) dataset.series.data_iterator = insee_data return dataset.update_database()
def upsert_dataset(self, dataset_code): self.get_selected_datasets() dataset_settings = self.selected_datasets[dataset_code] #http://data.worldbank.org/indicator/AG.AGR.TRAC.NO dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], last_update=clean_datetime(), fetcher=self) dataset.series.data_iterator = WorldBankAPIData(dataset, dataset_settings) return dataset.update_database()
def upsert_dataset(self, dataset_code): if not DATASETS.get(dataset_code): raise Exception("This dataset is unknown" + dataset_code) dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], fetcher=self) dataset.last_update = clean_datetime() dataset.series.data_iterator = OECD_Data( dataset, sdmx_filter=DATASETS[dataset_code]['sdmx_filter']) return dataset.update_database()
def upsert_dataset(self, dataset_code): if not DATASETS.get(dataset_code): raise Exception("This dataset is unknown" + dataset_code) dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], last_update=clean_datetime(), fetcher=self) dataset.series.data_iterator = OECD_Data(dataset, sdmx_filter=DATASETS[dataset_code]['sdmx_filter']) return dataset.update_database()
def upsert_dataset(self, dataset_code): #self.load_structure(force=False) start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) #if not dataset_code in self._dataflows: # raise Exception("This dataset is unknown: %s" % dataset_code) #dataflow = self._dataflows[dataset_code] #cat = self.db[constants.COL_CATEGORIES].find_one({'category_code': dataset_code}) #dataset.name = cat['name'] #dataset.doc_href = cat['doc_href'] #dataset.last_update = cat['last_update'] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, #name=dataflow.name.en, doc_href=None, last_update=datetime.now(), #TODO: fetcher=self) dataset_doc = self.db[constants.COL_DATASETS].find_one({'provider_name': self.provider_name, "dataset_code": dataset_code}) insee_data = INSEE_Data(dataset=dataset, dataset_doc=dataset_doc, #dataflow=dataflow, #sdmx=self.sdmx ) dataset.series.data_iterator = insee_data result = dataset.update_database() end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end)) """ > IDBANK: A définir dynamiquement sur site ? doc_href d'une serie: http://www.bdm.insee.fr/bdm2/affichageSeries?idbank=001694226 > CODE GROUPE: Balance des Paiements mensuelle - Compte de capital http://www.bdm.insee.fr/bdm2/choixCriteres?codeGroupe=1556 """ return result
def upsert_dataset(self, dataset_code): self.get_selected_datasets() dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], fetcher=self ) if dataset_code in DATASETS: dataset.series.data_iterator = ExcelData(dataset, DATASETS[dataset_code]["url"]) dataset.doc_href = DATASETS[dataset_code]["doc_href"] else: dataset.last_update = clean_datetime() dataset.series.data_iterator = WorldBankAPIData(dataset, dataset_settings) return dataset.update_database()
def test_update_database(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_update_database self._collections_is_empty() f = Fetcher(provider_name="p1", db=self.db) d = Datasets(provider_name="p1", dataset_code="d1", name="d1 Name", last_update=datetime.now(), doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("Scale", "Billions", "Billions") d.dimension_list.update_entry("country", "AFG", "AFG") datas = FakeDatas(provider_name="p1", dataset_code="d1", fetcher=f) d.series.data_iterator = datas id = d.update_database() self.assertIsNotNone(id) self.assertIsInstance(id, ObjectId) self.db[constants.COL_DATASETS].find_one({'_id': ObjectId(id)}) #print(result.raw) bson = self.db[constants.COL_DATASETS].find_one({'provider_name': "p1", "dataset_code": "d1"}) self.assertIsNotNone(bson) self.assertEqual(bson['provider_name'], "p1") self.assertEqual(bson["dataset_code"], "d1") self.assertEqual(bson["name"], "d1 Name") self.assertEqual(bson["doc_href"], "http://www.example.com") self.assertTrue(isinstance(bson["dimension_list"], dict)) self.assertTrue(isinstance(bson["attribute_list"], dict)) series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, "dataset_code": d.dataset_code}) self.assertEqual(series.count(), datas.max_record)
def test_update_tag(self): # nosetests -s -v dlstats.tests.test_search:DBTagsTestCase.test_update_tag max_record = 10 d = Datasets(provider_name="eurostat", dataset_code="name_a", name="Eurostat name_a", last_update=datetime.now(), doc_href="http://www.example.com", fetcher=self.fetcher, is_load_previous_version=False) d.dimension_list.update_entry("Country", "FRA", "France") d.dimension_list.update_entry("Scale", "Billions", "Billions Dollars") datas = FakeDatas(provider_name=d.provider_name, dataset_code=d.dataset_code, max_record=max_record) d.series.data_iterator = datas _id = d.update_database() utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_DATASETS, max_bulk=20) utils.update_tags(self.db, provider_name=d.provider_name, dataset_code=d.dataset_code, col_name=constants.COL_SERIES, max_bulk=20) doc = self.db[constants.COL_DATASETS].find_one({"_id": _id}) self.assertListEqual(doc['tags'], sorted(['eurostat', 'name_a', 'billions', 'dollars', 'france'])) query = {'provider_name': d.provider_name, "dataset_code": d.dataset_code} series = self.db[constants.COL_SERIES].find(query) self.assertEqual(series.count(), max_record) for s in series: self.assertTrue(len(s['tags']) > 0)
def upsert_dataset(self, dataset_code): settings = DATASETS[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=settings['name'], doc_href=settings['doc_href'], fetcher=self) klass = None if dataset_code in DATASETS_KLASS: klass = DATASETS_KLASS[dataset_code] else: klass = DATASETS_KLASS["XML"] dataset.series.data_iterator = klass(dataset) return dataset.update_database()
def upsert_dataset(self, dataset_code): dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name="My Dataset Name", last_update=clean_datetime(), fetcher=self) dataset.codelists = { 'COUNTRY': { 'FRA': 'France' }, 'OBS_STATUS': { 'A': "A" } } fetcher_data = DUMMY_Data(dataset) dataset.series.data_iterator = fetcher_data return dataset.update_database()
def upsert_weo_issue(self, url, dataset_code): settings = DATASETS[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=settings['name'], doc_href=settings['doc_href'], fetcher=self) weo_data = WeoData(dataset, url) dataset.last_update = weo_data.release_date dataset.attribute_list.update_entry('flags','e','Estimated') dataset.series.data_iterator = weo_data try: dataset.update_database() self.update_metas(dataset_code) except Exception as err: logger.error(str(err))
def upsert_dataset(self, dataset_code): """Updates data in Database for selected datasets :dset: dataset_code :returns: None""" self.get_selected_datasets() self.dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=self.dataset_settings['name'], doc_href=self.dataset_settings['metadata']['doc_href'], last_update=self.dataset_settings['last_update'], fetcher=self) url = self.dataset_settings['metadata']['url'] dataset.series.data_iterator = EsriData(dataset, url) return dataset.update_database()
def _common_tests(self): self._collections_is_empty() self.filepath = get_filepath(self.dataset_code) self.assertTrue(os.path.exists(self.filepath)) # provider.update_database self.fetcher.provider.update_database() provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name}) self.assertIsNotNone(provider) dataset = Datasets( provider_name=self.fetcher.provider_name, dataset_code=self.dataset_code, name=DATASETS[self.dataset_code]["name"], last_update=DATASETS[self.dataset_code]["last_update"], fetcher=self.fetcher, ) # manual Data for iterator fetcher_data = esri.EsriData(dataset, make_url(self), filename=DATASETS[self.dataset_code]["filename"]) dataset.series.data_iterator = fetcher_data dataset.last_update = DATASETS[self.dataset_code]["last_update"] dataset.update_database() self.dataset = self.db[constants.COL_DATASETS].find_one( {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code} ) self.assertIsNotNone(self.dataset) dimensions = self.dataset["dimension_list"] self.assertEqual(len(dimensions), DATASETS[self.dataset_code]["dimension_count"]) for c in dimensions["concept"]: self.assertIn(c[1], DATASETS["series_names"]) series = self.db[constants.COL_SERIES].find( {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code} ) self.assertEqual(series.count(), DATASETS[self.dataset_code]["series_count"])
def upsert_dataset(self, dataset_code): """Updates data in Database for selected datasets :dset: dataset_code :returns: None""" self.get_selected_datasets() self.dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=self.dataset_settings['name'], doc_href=self.dataset_settings['metadata']['doc_href'], last_update=self.dataset_settings['last_update'], fetcher=self) url = self.dataset_settings['metadata']['url'] dataset.series.data_iterator = EsriData(dataset, url) return dataset.update_database()
def upsert_dataset(self, dataset_code): self._load_structure() dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=None, last_update=clean_datetime(), fetcher=self) query = {'provider_name': self.provider_name, "dataset_code": dataset_code} dataset_doc = self.db[constants.COL_DATASETS].find_one(query) insee_data = INSEE_Data(dataset, dataset_doc=dataset_doc) dataset.series.data_iterator = insee_data return dataset.update_database()
def upsert_dataset(self, dataset_code): self.get_selected_datasets() dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], fetcher=self) if dataset_code in DATASETS: dataset.series.data_iterator = ExcelData( dataset, DATASETS[dataset_code]["url"]) dataset.doc_href = DATASETS[dataset_code]["doc_href"] else: dataset.last_update = clean_datetime() dataset.series.data_iterator = WorldBankAPIData( dataset, dataset_settings) return dataset.update_database()
def upsert_dataset(self, dataset_code): start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) if not DATASETS.get(dataset_code): raise Exception("This dataset is unknown" + dataset_code) dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]["name"], doc_href=DATASETS[dataset_code]["doc_href"], fetcher=self, ) fetcher_data = BIS_Data(dataset, url=DATASETS[dataset_code]["url"], filename=DATASETS[dataset_code]["filename"]) if fetcher_data.is_updated(): dataset.series.data_iterator = fetcher_data dataset.update_database() # TODO: clean datas (file temp) end = time.time() - start logger.info("upsert dataset[%s] - END-BEFORE-METAS - time[%.3f seconds]" % (dataset_code, end)) self.update_metas(dataset_code) end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end)) else: logger.info( "upsert dataset[%s] bypass because is updated from release_date[%s]" % (dataset_code, fetcher_data.release_date) )
def upsert_dataset(self, dataset_code): if not DATASETS.get(dataset_code): raise Exception("This dataset is unknown" + dataset_code) dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], fetcher=self) fetcher_data = BIS_Data(dataset, url=DATASETS[dataset_code]['url'], filename=DATASETS[dataset_code]['filename'], frequency=DATASETS[dataset_code]['frequency']) if fetcher_data.is_updated(): dataset.series.data_iterator = fetcher_data return dataset.update_database() else: comments = "update-date[%s]" % fetcher_data.release_date raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=dataset_code, comments=comments)