def test_process_series_data(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_process_series_data self._collections_is_empty() provider_name = "p1" dataset_code = "d1" dataset_name = "d1 name" f = Fetcher(provider_name=provider_name, db=self.db) d = Datasets(provider_name=provider_name, dataset_code=dataset_code, name=dataset_name, last_update=datetime.now(), doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("Scale", "Billions", "Billions") d.dimension_list.update_entry("Country", "AFG", "AFG") s = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2013,10,28), bulk_size=1, fetcher=f) datas = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) s.data_iterator = datas d.series = s d.update_database() '''Count All series''' self.assertEqual(self.db[constants.COL_SERIES].count(), datas.max_record) '''Count series for this provider and dataset''' series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, "dataset_code": dataset_code}) self.assertEqual(series.count(), datas.max_record) tags.update_tags(self.db, provider_name=f.provider_name, dataset_code=dataset_code, col_name=constants.COL_SERIES) '''Count series for this provider and dataset and in keys[]''' series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, "dataset_code": dataset_code, "key": {"$in": datas.keys}}) self.assertEqual(series.count(), datas.max_record) for doc in series: self.assertTrue("tags" in doc) self.assertTrue(len(doc['tags']) > 0)
def test_revisions(self): # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_revisions self._collections_is_empty() provider_name = "p1" dataset_code = "d1" dataset_name = "d1 name" f = Fetcher(provider_name=provider_name, db=self.db) d = Datasets(provider_name=provider_name, dataset_code=dataset_code, name=dataset_name, last_update=datetime.now(), doc_href="http://www.example.com", fetcher=f, is_load_previous_version=False) d.dimension_list.update_entry("Scale", "Billions", "Billions") d.dimension_list.update_entry("Country", "AFG", "AFG") s1 = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2013,4,1), bulk_size=1, fetcher=f) datas1 = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) s1.data_iterator = datas1 d.series = s1 d.update_database() # A. modifying existing values test_key = datas1.rows[0]['key'] first_series = self.db[constants.COL_SERIES].find_one({'key': test_key}) s2 = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2014,4,1), bulk_size=1, fetcher=f) datas2 = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) datas2.keys = datas1.keys for i,r in enumerate(datas2.rows): r['key'] = datas2.keys[i] r['frequency'] = datas1.rows[i]['frequency'] r['start_date'] = datas1.rows[i]['start_date'] r['end_date'] = datas1.rows[i]['end_date'] datas2.rows[0]['values'] = deepcopy(datas1.rows[0]['values']) datas2.rows[0]['values'][1] = str(float(datas2.rows[0]['values'][1]) + 1.5) datas2.rows[0]['values'][8] = str(float(datas2.rows[0]['values'][8]) - 0.9) s2.data_iterator = datas2 d.series = s2 d.update_database() self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record) test_key = datas2.keys[0] test_series = self.db[constants.COL_SERIES].find_one({'key': test_key}) self.assertEqual(len(test_series['revisions']),2) self.assertEqual(test_series['revisions']['1'],[{'value': datas1.rows[0]['values'][1],'release_date':s1.last_update}]) self.assertEqual(test_series['revisions']['8'],[{'value': datas1.rows[0]['values'][8],'release_date':s1.last_update}]) self.assertEqual(test_series['release_dates'][1],datetime(2014,4,1)) self.assertEqual(test_series['release_dates'][8],datetime(2014,4,1)) self.assertEqual(test_series['release_dates'][0],datetime(2013,4,1)) self.assertEqual(test_series['release_dates'][2:8],[datetime(2013,4,1) for i in range(6)]) self.assertEqual(test_series['start_date'],datas1.rows[0]['start_date']) self.assertEqual(test_series['end_date'],datas1.rows[0]['end_date']) # B. adding observations at the beginning of the series s3 = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2014,4,1), bulk_size=1, fetcher=f) datas3 = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) datas3.keys = datas1.keys for i,r in enumerate(datas3.rows): r['key'] = datas3.keys[i] r['frequency'] = datas1.rows[i]['frequency'] r['start_date'] = datas1.rows[i]['start_date'] r['end_date'] = datas1.rows[i]['end_date'] datas3.rows[0]['start_date'] = datas1.rows[0]['start_date'] - 2; datas3.rows[0]['values'] = [ '10', '10'] + datas1.rows[0]['values'] datas3.rows[0]['values'][3] = str(float(datas3.rows[0]['values'][3]) + 1.5) datas3.rows[0]['values'][10] = str(float(datas3.rows[0]['values'][10]) - 0.9) s3.data_iterator = datas3 d.series = s3 d.update_database() self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record) test_key = datas3.keys[0] test_series = self.db[constants.COL_SERIES].find_one({'key': test_key}) self.assertEqual(len(test_series['revisions']),2) self.assertEqual(test_series['revisions']['3'],[{'value': datas1.rows[0]['values'][1],'release_date':s1.last_update}]) self.assertEqual(test_series['revisions']['10'],[{'value': datas1.rows[0]['values'][8],'release_date':s1.last_update}]) self.assertEqual(len(test_series['release_dates']),len(test_series['values'])) self.assertEqual(test_series['release_dates'][3],datetime(2014,4,1)) self.assertEqual(test_series['release_dates'][10],datetime(2014,4,1)) self.assertEqual(test_series['release_dates'][0:2],[datetime(2014,4,1) for i in range(2)]) self.assertEqual(test_series['release_dates'][2],datetime(2013,4,1)) self.assertEqual(test_series['release_dates'][4:10],[datetime(2013,4,1) for i in range(6)]) self.assertEqual(len(test_series['values']),11) self.assertEqual(len(test_series['release_dates']),11) self.assertEqual(test_series['start_date'],datas2.rows[0]['start_date']-2) self.assertEqual(test_series['end_date'],datas2.rows[0]['end_date']) # C. adding observations at the end of the series s4 = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2014,5,1), bulk_size=1, fetcher=f) datas4 = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) datas4.keys = datas1.keys for i,r in enumerate(datas4.rows): r['key'] = datas4.keys[i] r['frequency'] = datas1.rows[i]['frequency'] r['start_date'] = datas3.rows[i]['start_date'] r['end_date'] = datas3.rows[i]['end_date'] datas4.rows[0]['end_date'] = datas3.rows[0]['end_date'] + 1; datas4.rows[0]['values'] = datas3.rows[0]['values'] + ['1.0'] s4.data_iterator = datas4 d.series = s4 d.update_database() self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record) test_key = datas4.keys[0] test_series = self.db[constants.COL_SERIES].find_one({'key': test_key}) self.assertEqual(len(test_series['revisions']),2) self.assertEqual(len(test_series['values']),12) self.assertEqual(test_series['values'][11],'1.0') self.assertEqual(len(test_series['release_dates']),12) self.assertEqual(test_series['release_dates'][11],datetime(2014,5,1)) self.assertEqual(test_series['start_date'],datas3.rows[0]['start_date']) self.assertEqual(test_series['end_date'],datas3.rows[0]['end_date']+1) # D. removing observations at the beginning and the end of the series s5 = Series(provider_name=f.provider_name, dataset_code=dataset_code, last_update=datetime(2014,6,1), bulk_size=1, fetcher=f) datas5 = FakeDatas(provider_name=provider_name, dataset_code=dataset_code, fetcher=f) datas5.keys = datas1.keys for i,r in enumerate(datas5.rows): r['key'] = datas4.keys[i] r['frequency'] = datas1.rows[i]['frequency'] r['start_date'] = datas4.rows[i]['start_date'] r['end_date'] = datas4.rows[i]['end_date'] datas5.rows[0]['start_date'] = datas4.rows[0]['start_date'] + 1; datas5.rows[0]['end_date'] = datas4.rows[0]['end_date'] - 1; datas5.rows[0]['values'] = datas4.rows[0]['values'][1:-1] s5.data_iterator = datas5 d.series = s5 d.update_database() self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record) test_key = datas5.keys[0] test_series = self.db[constants.COL_SERIES].find_one({'key': test_key}) self.assertEqual(len(test_series['revisions']),4) self.assertEqual(len(test_series['values']),12) self.assertEqual(test_series['values'][0],'na') self.assertEqual(test_series['values'][1],datas4.rows[0]['values'][1]) self.assertEqual(test_series['values'][10],datas4.rows[0]['values'][-2]) self.assertEqual(test_series['values'][11],'na') self.assertEqual(test_series['release_dates'][0],datetime(2014,6,1)) self.assertEqual(test_series['release_dates'][1],datetime(2014,4,1)) self.assertEqual(test_series['release_dates'][10],datetime(2014,4,1)) self.assertEqual(test_series['release_dates'][11],datetime(2014,6,1)) self.assertEqual(test_series['start_date'],datas4.rows[0]['start_date']) self.assertEqual(test_series['end_date'],datas4.rows[0]['end_date'])