def download(overwrite_cached=False): settings = par.read_settings(str(_HERE_ / 'settings.json')) cached_dd = dl.check_cached(settings['dd_path'], kind='dictionary') cached_month = dl.check_cached(settings['monthly_path'], kind='data') dds = dl.all_monthly_files(kind='dictionary') dds = filter(itemgetter(1), dds) # make sure not None cpsdec! dds = dl.filter_dds(dds, months=[par._month_to_dd(settings['date_start']), par._month_to_dd(settings['date_end'])]) data = dl.all_monthly_files() data = dl.filter_monthly_files(data, months=[[settings['date_start'], settings['date_end']]]) if not overwrite_cached: def is_new(x, cache=None): return dl.rename_cps_monthly(x[1]) not in cache dds = filter(partial(is_new, cache=cached_dd), dds) data = filter(partial(is_new, cache=cached_month), data) for month, renamed in dds: dl.download_month(month, Path(settings['dd_path'])) logging.info("Downloaded {}".format(renamed)) for month, renamed in data: dl.download_month(month, Path(settings['monthly_path'])) logging.info("Downloaded {}".format(renamed))
def download(kind, settings, overwrite=False): """ Download files from NBER. kind : {'dictionary', 'data'} settings : dict overwrite : bool; default True Whether to overwrite existing files """ s_path = {'dictionary': 'dd_path', 'data': 'monthly_path'}[kind] cached = dl.check_cached(settings[s_path], kind=kind) files = dl.all_monthly_files(kind=kind) if kind == 'dictionary': files = filter(itemgetter(1), files) # make sure not None cpsdec! months = [par._month_to_dd(settings['date_start']), par._month_to_dd(settings['date_end'])] else: months = [[settings['date_start'], settings['date_end']]] files = dl.filter_monthly(files, months=months, kind=kind) def is_new(x, cache=None): return dl.rename_cps_monthly(x[1]) not in cache for month, renamed in files: if is_new((month, renamed), cache=cached) or overwrite: dl.download_month(month, Path(settings[s_path])) logger.info("Downloaded {}".format(renamed)) else: logger.info("Using cached {}".format(renamed))
def test_all_monthly_dd(self): result = list( d.all_monthly_files(curdir + '/files/trimmed_nber.html', kind='dictionary')) expected = [('cpsbjan03.ddf', 'cpsm2003-01.ddf'), ('jan94_mar94_dd.txt', 'cpsm1994-01.txt'), ('apr94_may95_dd.txt', 'cpsm1994-04.txt'), ('jun95_aug95_dd.txt', 'cpsm1995-06.txt')] self.assertEqual(result, expected)
def test_all_monthly_dd(self): result = list(d.all_monthly_files(curdir + '/files/trimmed_nber.html', kind='dictionary')) expected = [('cpsbjan03.ddf', 'cpsm2003-01.ddf'), ('jan94_mar94_dd.txt', 'cpsm1994-01.txt'), ('apr94_may95_dd.txt', 'cpsm1994-04.txt'), ('jun95_aug95_dd.txt', 'cpsm1995-06.txt') ] self.assertEqual(result, expected)
def test_all_monthly_other(self): with self.assertRaises(ValueError): list(d.all_monthly_files(kind='foo')) # need to thunk
def test_all_monthly_files(self): result = list(d.all_monthly_files(curdir + '/files/trimmed_nber.html')) expected = [('cpsb7601.Z', 'cpsm1976-01.Z'), ('jan94pub.zip', 'cpsm1994-01.zip')] self.assertEqual(result, expected)
def test_all_monthly_dd(self): result = list(d.all_monthly_files(curdir + '/files/trimmed_nber.html', kind='dictionary')) expected = [('cpsbjan03.ddf', 'cpsm2003-01.ddf')] self.assertEqual(result, expected)