def upsert_dataset(self, dataset_code): """Updates data in Database for selected datasets """ self.get_selected_datasets() doc = self.db[constants.COL_DATASETS].find_one( { 'provider_name': self.provider_name, 'dataset_code': dataset_code }, { 'dataset_code': 1, 'last_update': 1 }) dataset_settings = self.selected_datasets[dataset_code] if doc and doc['last_update'] >= dataset_settings['last_update']: comments = "update-date[%s]" % doc['last_update'] raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=dataset_code, comments=comments) dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], doc_href=dataset_settings["metadata"].get("doc_href"), last_update=None, fetcher=self) dataset.last_update = dataset_settings["last_update"] dataset.series.data_iterator = EurostatData(dataset) return dataset.update_database()
def upsert_dataset(self, dataset_code): settings = self._get_datasets_settings()[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=settings["name"], doc_href='http://www.bea.gov', fetcher=self) url = settings["metadata"]["url"] filename = settings["metadata"]["filename"] sheet_name = settings["metadata"]["sheet_name"] sheet = self._get_sheet(url, filename, sheet_name) fetcher_data = BeaData(dataset, url=url, sheet=sheet) if dataset.last_update and fetcher_data.release_date >= dataset.last_update and not self.force_update: comments = "update-date[%s]" % fetcher_data.release_date raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=dataset_code, comments=comments) dataset.last_update = fetcher_data.release_date dataset.series.data_iterator = fetcher_data return dataset.update_database()
def test_constructor(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DatasetTestCase.test_constructor with self.assertRaises(ValueError): Datasets(is_load_previous_version=False) f = Fetcher(provider_name="p1", is_indexes=False) d = Datasets(provider_name="p1", dataset_code="d1", name="d1 Name", doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("country", "country", "country") self.assertTrue(isinstance(d.series, Series)) self.assertTrue(isinstance(d.dimension_list, CodeDict)) self.assertTrue(isinstance(d.attribute_list, CodeDict)) bson = d.bson self.assertEqual(bson['provider_name'], "p1") self.assertEqual(bson["dataset_code"], "d1") self.assertEqual(bson["name"], "d1 Name") self.assertEqual(bson["doc_href"], "http://www.example.com") self.assertTrue(isinstance(bson["dimension_list"], dict)) self.assertTrue(isinstance(bson["attribute_list"], dict)) self.assertIsNone(bson["last_update"]) self.assertEqual(bson["slug"], "p1-d1") #TODO: last_update d.last_update = datetime.now()
def upsert_dataset(self, dataset_code): """Updates data in Database for selected datasets """ self.get_selected_datasets() doc = self.db[constants.COL_DATASETS].find_one( {"provider_name": self.provider_name, "dataset_code": dataset_code}, {"dataset_code": 1, "last_update": 1} ) dataset_settings = self.selected_datasets[dataset_code] if doc and doc["last_update"] >= dataset_settings["last_update"]: comments = "update-date[%s]" % doc["last_update"] raise errors.RejectUpdatedDataset( provider_name=self.provider_name, dataset_code=dataset_code, comments=comments ) dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], doc_href=dataset_settings["metadata"].get("doc_href"), last_update=None, fetcher=self, ) dataset.last_update = dataset_settings["last_update"] dataset.series.data_iterator = EurostatData(dataset) return dataset.update_database()
def upsert_sna(self, url, dataset_code): dataset = Datasets(self.provider_name,dataset_code, fetcher=self) sna_data = EsriData(dataset,url) dataset.name = self.dataset_name[dataset_code] dataset.doc_href = 'http://www.esri.cao.go.jp/index-e.html' dataset.last_update = sna_data.release_date dataset.series.data_iterator = sna_data dataset.update_database()
def upsert_gem(self, dataset_code): d = DATASETS[dataset_code] url = d['url'] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=d['name'], doc_href=d['doc_href'], fetcher=self) gem_data = GemData(dataset, url) dataset.last_update = gem_data.release_date dataset.series.data_iterator = gem_data dataset.update_database()
def upsert_dataset(self, dataset_code): self._load_structure() dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=self.provider.website, fetcher=self) dataset.last_update = utils.clean_datetime() _data = ECB_Data(dataset=dataset) dataset.series.data_iterator = _data return dataset.update_database()
def upsert_dataset(self, dataset_code, sheet): start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) dataset = Datasets(self.provider_name,dataset_code, fetcher=self) bea_data = BeaData(dataset,self.url, sheet) dataset.name = dataset_code dataset.doc_href = 'http://www.bea.gov/newsreleases/national/gdp/gdpnewsrelease.htm' dataset.last_update = bea_data.release_date dataset.series.data_iterator = bea_data dataset.update_database() self.update_metas(dataset_code) end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
def upsert_dataset(self, dataset_code): self._load_structure() dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=self.provider.website, fetcher=self, ) dataset.last_update = utils.clean_datetime() _data = ECB_Data(dataset=dataset) dataset.series.data_iterator = _data return dataset.update_database()
def upsert_dataset(self, dataset_code): if not DATASETS.get(dataset_code): raise Exception("This dataset is unknown" + dataset_code) dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], fetcher=self) dataset.last_update = clean_datetime() dataset.series.data_iterator = OECD_Data( dataset, sdmx_filter=DATASETS[dataset_code]['sdmx_filter']) return dataset.update_database()
def upsert_dataset(self, dataset_code): self._load_structure_dataflows() self._load_structure_concepts() dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=None, doc_href=None, fetcher=self) dataset.last_update = clean_datetime() insee_data = INSEE_Data(dataset) dataset.series.data_iterator = insee_data return dataset.update_database()
def upsert_dataset(self, dataset_code): if not DATASETS.get(dataset_code): raise Exception("This dataset is unknown" + dataset_code) dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=DATASETS[dataset_code]['name'], doc_href=DATASETS[dataset_code]['doc_href'], fetcher=self) dataset.last_update = clean_datetime() dataset.series.data_iterator = OECD_Data(dataset, sdmx_filter=DATASETS[dataset_code]['sdmx_filter']) return dataset.update_database()
def upsert_dataset(self, dataset_code): self.get_selected_datasets() dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets( provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], fetcher=self ) if dataset_code in DATASETS: dataset.series.data_iterator = ExcelData(dataset, DATASETS[dataset_code]["url"]) dataset.doc_href = DATASETS[dataset_code]["doc_href"] else: dataset.last_update = clean_datetime() dataset.series.data_iterator = WorldBankAPIData(dataset, dataset_settings) return dataset.update_database()
def upsert_weo_issue(self, url, dataset_code): settings = DATASETS[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=settings['name'], doc_href=settings['doc_href'], fetcher=self) weo_data = WeoData(dataset, url) dataset.last_update = weo_data.release_date dataset.attribute_list.update_entry('flags','e','Estimated') dataset.series.data_iterator = weo_data try: dataset.update_database() self.update_metas(dataset_code) except Exception as err: logger.error(str(err))
def _common_tests(self): self._collections_is_empty() self.filepath = get_filepath(self.dataset_code) self.assertTrue(os.path.exists(self.filepath)) # provider.update_database self.fetcher.provider.update_database() provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name}) self.assertIsNotNone(provider) dataset = Datasets( provider_name=self.fetcher.provider_name, dataset_code=self.dataset_code, name=DATASETS[self.dataset_code]["name"], last_update=DATASETS[self.dataset_code]["last_update"], fetcher=self.fetcher, ) # manual Data for iterator fetcher_data = esri.EsriData(dataset, make_url(self), filename=DATASETS[self.dataset_code]["filename"]) dataset.series.data_iterator = fetcher_data dataset.last_update = DATASETS[self.dataset_code]["last_update"] dataset.update_database() self.dataset = self.db[constants.COL_DATASETS].find_one( {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code} ) self.assertIsNotNone(self.dataset) dimensions = self.dataset["dimension_list"] self.assertEqual(len(dimensions), DATASETS[self.dataset_code]["dimension_count"]) for c in dimensions["concept"]: self.assertIn(c[1], DATASETS["series_names"]) series = self.db[constants.COL_SERIES].find( {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code} ) self.assertEqual(series.count(), DATASETS[self.dataset_code]["series_count"])
def upsert_dataset(self, dataset_code): """Updates data in Database for selected datasets :dset: dataset_code :returns: None""" self.get_selected_datasets() start = time.time() logger.info("upsert dataset[%s] - START" % (dataset_code)) self.dataset_settings = self.selected_datasets[dataset_code] url = self.make_url() dataset = Datasets(self.provider_name,dataset_code, fetcher=self) dataset.name = self.dataset_settings['name'] dataset.doc_href = self.dataset_settings['metadata']['doc_href'] dataset.last_update = self.dataset_settings['last_update'] data_iterator = EsriData(dataset,url,filename=dataset_code) dataset.series.data_iterator = data_iterator dataset.update_database() end = time.time() - start logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
def upsert_dataset(self, dataset_code): self.get_selected_datasets() dataset_settings = self.selected_datasets[dataset_code] dataset = Datasets(provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], fetcher=self) if dataset_code in DATASETS: dataset.series.data_iterator = ExcelData( dataset, DATASETS[dataset_code]["url"]) dataset.doc_href = DATASETS[dataset_code]["doc_href"] else: dataset.last_update = clean_datetime() dataset.series.data_iterator = WorldBankAPIData( dataset, dataset_settings) return dataset.update_database()