Example #1
0
    def test_process_series_data(self):        

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_process_series_data

        self._collections_is_empty()
    
        provider_name = "p1"
        dataset_code = "d1"
        dataset_name = "d1 name"
    
        f = Fetcher(provider_name=provider_name, 
                    db=self.db)

        d = Datasets(provider_name=provider_name, 
                    dataset_code=dataset_code,
                    name=dataset_name,
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("Scale", "Billions", "Billions")
        d.dimension_list.update_entry("Country", "AFG", "AFG")
        
        s = Series(provider_name=f.provider_name, 
                   dataset_code=dataset_code, 
                   last_update=datetime(2013,10,28), 
                   bulk_size=1, 
                   fetcher=f)
        
        datas = FakeDatas(provider_name=provider_name, 
                          dataset_code=dataset_code,
                          fetcher=f)
        s.data_iterator = datas
        
        d.series = s
        d.update_database()        
        
        '''Count All series'''
        self.assertEqual(self.db[constants.COL_SERIES].count(), datas.max_record)

        '''Count series for this provider and dataset'''
        series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, 
                                                     "dataset_code": dataset_code})
        self.assertEqual(series.count(), datas.max_record)

        tags.update_tags(self.db, 
                    provider_name=f.provider_name, dataset_code=dataset_code,  
                    col_name=constants.COL_SERIES)        

        '''Count series for this provider and dataset and in keys[]'''
        series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name, 
                                                     "dataset_code": dataset_code,
                                                     "key": {"$in": datas.keys}})
        
        self.assertEqual(series.count(), datas.max_record)
        

        for doc in series:
            self.assertTrue("tags" in doc)
            self.assertTrue(len(doc['tags']) > 0)
 def test_full_example(self):
     series = Series(provider='Test provider',
                     name='GDP in France',
                     key='GDP_FR',
                     datasetCode='nama_gdp_fr',
                     values=[2700, 2720, 2740, 2760],
                     releaseDates=[
                         datetime(2013, 11, 28),
                         datetime(2014, 12, 28),
                         datetime(2015, 1, 28),
                         datetime(2015, 2, 28)
                     ],
                     period_index=pandas.period_range('1/1999',
                                                      periods=72,
                                                      freq='Q'),
                     attributes={
                         'name': 'OBS_VALUE',
                         'value': 'p'
                     },
                     revisions=[{
                         'value': 2710,
                         'position': 2,
                         'releaseDates': [datetime(2014, 11, 28)]
                     }],
                     dimensions=[{
                         'name': 'Seasonal adjustment',
                         'value': 'wda'
                     }])
     self.assertIsInstance(series, Series)
 def test_empty_revisions(self):
     series = Series(provider='Test provider',
                     name='GDP in Germany',
                     key='GDP_DE',
                     datasetCode='nama_gdp_de',
                     values=[2700, 2720, 2740, 2760],
                     releaseDates=[
                         datetime(2013, 11, 28),
                         datetime(2014, 12, 28),
                         datetime(2015, 1, 28),
                         datetime(2015, 2, 28)
                     ],
                     period_index=pandas.period_range('1/1999',
                                                      periods=72,
                                                      freq='Q'),
                     attributes={
                         'name': 'OBS_VALUE',
                         'value': 'p'
                     },
                     dimensions=[{
                         'name': 'Seasonal adjustment',
                         'value': 'wda'
                     }])
     self.assertIsInstance(series, Series)
Example #4
0
    def test_revisions(self):        

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_revisions

        self._collections_is_empty()
    
        provider_name = "p1"
        dataset_code = "d1"
        dataset_name = "d1 name"
    
        f = Fetcher(provider_name=provider_name, 
                    db=self.db)

        d = Datasets(provider_name=provider_name, 
                    dataset_code=dataset_code,
                    name=dataset_name,
                    last_update=datetime.now(),
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("Scale", "Billions", "Billions")
        d.dimension_list.update_entry("Country", "AFG", "AFG")
        
        s1 = Series(provider_name=f.provider_name, 
                    dataset_code=dataset_code, 
                    last_update=datetime(2013,4,1), 
                    bulk_size=1, 
                    fetcher=f)
        datas1 = FakeDatas(provider_name=provider_name, 
                           dataset_code=dataset_code,
                           fetcher=f)
        s1.data_iterator = datas1

        d.series = s1
        d.update_database()        

        # A. modifying existing values
        test_key = datas1.rows[0]['key']
        first_series = self.db[constants.COL_SERIES].find_one({'key': test_key})

        s2 = Series(provider_name=f.provider_name, 
                    dataset_code=dataset_code, 
                    last_update=datetime(2014,4,1), 
                    bulk_size=1, 
                    fetcher=f)
        
        datas2 = FakeDatas(provider_name=provider_name, 
                           dataset_code=dataset_code,
                           fetcher=f)
        
        datas2.keys = datas1.keys
        
        for i,r in enumerate(datas2.rows):
            r['key'] = datas2.keys[i]
            r['frequency'] = datas1.rows[i]['frequency']
            r['start_date'] = datas1.rows[i]['start_date']
            r['end_date'] = datas1.rows[i]['end_date']
        datas2.rows[0]['values'] = deepcopy(datas1.rows[0]['values'])
        datas2.rows[0]['values'][1] = str(float(datas2.rows[0]['values'][1]) + 1.5)
        datas2.rows[0]['values'][8] = str(float(datas2.rows[0]['values'][8]) - 0.9)
        s2.data_iterator = datas2
        
        d.series = s2
        d.update_database()        

        self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record)
        test_key = datas2.keys[0]
        test_series = self.db[constants.COL_SERIES].find_one({'key': test_key})
        self.assertEqual(len(test_series['revisions']),2)
        self.assertEqual(test_series['revisions']['1'],[{'value': datas1.rows[0]['values'][1],'release_date':s1.last_update}])
        self.assertEqual(test_series['revisions']['8'],[{'value': datas1.rows[0]['values'][8],'release_date':s1.last_update}])
        self.assertEqual(test_series['release_dates'][1],datetime(2014,4,1))
        self.assertEqual(test_series['release_dates'][8],datetime(2014,4,1))
        self.assertEqual(test_series['release_dates'][0],datetime(2013,4,1))
        self.assertEqual(test_series['release_dates'][2:8],[datetime(2013,4,1) for i in range(6)])
        self.assertEqual(test_series['start_date'],datas1.rows[0]['start_date'])
        self.assertEqual(test_series['end_date'],datas1.rows[0]['end_date'])

        # B. adding observations at the beginning of the series
        s3 = Series(provider_name=f.provider_name, 
                    dataset_code=dataset_code, 
                    last_update=datetime(2014,4,1), 
                    bulk_size=1, 
                    fetcher=f)
        
        datas3 = FakeDatas(provider_name=provider_name, 
                           dataset_code=dataset_code,
                           fetcher=f)
        
        datas3.keys = datas1.keys
        
        for i,r in enumerate(datas3.rows):
            r['key'] = datas3.keys[i]
            r['frequency'] = datas1.rows[i]['frequency']
            r['start_date'] = datas1.rows[i]['start_date']
            r['end_date'] = datas1.rows[i]['end_date']
        
        datas3.rows[0]['start_date'] = datas1.rows[0]['start_date'] - 2;    
        datas3.rows[0]['values'] = [ '10', '10'] + datas1.rows[0]['values']
        datas3.rows[0]['values'][3] = str(float(datas3.rows[0]['values'][3]) + 1.5)
        datas3.rows[0]['values'][10] = str(float(datas3.rows[0]['values'][10]) - 0.9)
        s3.data_iterator = datas3
        
        d.series = s3
        d.update_database()        

        self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record)
        test_key = datas3.keys[0]
        test_series = self.db[constants.COL_SERIES].find_one({'key': test_key})
        self.assertEqual(len(test_series['revisions']),2)
        self.assertEqual(test_series['revisions']['3'],[{'value': datas1.rows[0]['values'][1],'release_date':s1.last_update}])
        self.assertEqual(test_series['revisions']['10'],[{'value': datas1.rows[0]['values'][8],'release_date':s1.last_update}])
        self.assertEqual(len(test_series['release_dates']),len(test_series['values']))
        self.assertEqual(test_series['release_dates'][3],datetime(2014,4,1))
        self.assertEqual(test_series['release_dates'][10],datetime(2014,4,1))
        self.assertEqual(test_series['release_dates'][0:2],[datetime(2014,4,1) for i in range(2)])
        self.assertEqual(test_series['release_dates'][2],datetime(2013,4,1))
        self.assertEqual(test_series['release_dates'][4:10],[datetime(2013,4,1) for i in range(6)])
        self.assertEqual(len(test_series['values']),11)
        self.assertEqual(len(test_series['release_dates']),11)
        self.assertEqual(test_series['start_date'],datas2.rows[0]['start_date']-2)
        self.assertEqual(test_series['end_date'],datas2.rows[0]['end_date'])
            
        # C. adding observations at the end of the series
        s4 = Series(provider_name=f.provider_name, 
                    dataset_code=dataset_code, 
                    last_update=datetime(2014,5,1), 
                    bulk_size=1, 
                    fetcher=f)
        
        datas4 = FakeDatas(provider_name=provider_name, 
                           dataset_code=dataset_code,
                           fetcher=f)

        datas4.keys = datas1.keys
        
        for i,r in enumerate(datas4.rows):
            r['key'] = datas4.keys[i]
            r['frequency'] = datas1.rows[i]['frequency']
            r['start_date'] = datas3.rows[i]['start_date']
            r['end_date'] = datas3.rows[i]['end_date']
        
        datas4.rows[0]['end_date'] = datas3.rows[0]['end_date'] + 1;    
        datas4.rows[0]['values'] = datas3.rows[0]['values'] + ['1.0']
        s4.data_iterator = datas4
        
        d.series = s4
        d.update_database()        

        self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record)
        test_key = datas4.keys[0]
        test_series = self.db[constants.COL_SERIES].find_one({'key': test_key})
        self.assertEqual(len(test_series['revisions']),2)
        self.assertEqual(len(test_series['values']),12)
        self.assertEqual(test_series['values'][11],'1.0')
        self.assertEqual(len(test_series['release_dates']),12)
        self.assertEqual(test_series['release_dates'][11],datetime(2014,5,1))
        self.assertEqual(test_series['start_date'],datas3.rows[0]['start_date'])
        self.assertEqual(test_series['end_date'],datas3.rows[0]['end_date']+1)
            
        # D. removing observations at the beginning and the end of the series
        s5 = Series(provider_name=f.provider_name, 
                    dataset_code=dataset_code, 
                    last_update=datetime(2014,6,1), 
                    bulk_size=1, 
                    fetcher=f)
        
        datas5 = FakeDatas(provider_name=provider_name, 
                           dataset_code=dataset_code,
                           fetcher=f)

        datas5.keys = datas1.keys
        
        for i,r in enumerate(datas5.rows):
            r['key'] = datas4.keys[i]
            r['frequency'] = datas1.rows[i]['frequency']
            r['start_date'] = datas4.rows[i]['start_date']
            r['end_date'] = datas4.rows[i]['end_date']
        
        datas5.rows[0]['start_date'] = datas4.rows[0]['start_date'] + 1;    
        datas5.rows[0]['end_date'] = datas4.rows[0]['end_date'] - 1;    
        datas5.rows[0]['values'] = datas4.rows[0]['values'][1:-1]
        s5.data_iterator = datas5
        
        d.series = s5
        d.update_database()        

        self.assertEqual(self.db[constants.COL_SERIES].count(),datas1.max_record)
        test_key = datas5.keys[0]
        test_series = self.db[constants.COL_SERIES].find_one({'key': test_key})
        self.assertEqual(len(test_series['revisions']),4)
        self.assertEqual(len(test_series['values']),12)
        self.assertEqual(test_series['values'][0],'na')
        self.assertEqual(test_series['values'][1],datas4.rows[0]['values'][1])
        self.assertEqual(test_series['values'][10],datas4.rows[0]['values'][-2])
        self.assertEqual(test_series['values'][11],'na')
        self.assertEqual(test_series['release_dates'][0],datetime(2014,6,1))
        self.assertEqual(test_series['release_dates'][1],datetime(2014,4,1))
        self.assertEqual(test_series['release_dates'][10],datetime(2014,4,1))
        self.assertEqual(test_series['release_dates'][11],datetime(2014,6,1))
        self.assertEqual(test_series['start_date'],datas4.rows[0]['start_date'])
        self.assertEqual(test_series['end_date'],datas4.rows[0]['end_date'])