def download(overwrite_cached=False): settings = par.read_settings(str(_HERE_ / 'settings.json')) cached_dd = dl.check_cached(settings['dd_path'], kind='dictionary') cached_month = dl.check_cached(settings['monthly_path'], kind='data') dds = dl.all_monthly_files(kind='dictionary') dds = filter(itemgetter(1), dds) # make sure not None cpsdec! dds = dl.filter_dds(dds, months=[par._month_to_dd(settings['date_start']), par._month_to_dd(settings['date_end'])]) data = dl.all_monthly_files() data = dl.filter_monthly_files(data, months=[[settings['date_start'], settings['date_end']]]) if not overwrite_cached: def is_new(x, cache=None): return dl.rename_cps_monthly(x[1]) not in cache dds = filter(partial(is_new, cache=cached_dd), dds) data = filter(partial(is_new, cache=cached_month), data) for month, renamed in dds: dl.download_month(month, Path(settings['dd_path'])) logging.info("Downloaded {}".format(renamed)) for month, renamed in data: dl.download_month(month, Path(settings['monthly_path'])) logging.info("Downloaded {}".format(renamed))
def test_check_cached_clean(self): [os.remove(x) for x in self.tmpfiles] result = d.check_cached(self.tmpdir, kind='dictionary') expected = [] self.assertEqual(result, expected) result = d.check_cached(self.tmpdir, kind='data') self.assertEqual(result, expected) os.rmdir(self.tmpdir) result = d.check_cached(self.tmpdir) self.assertEqual(result, expected)
def test_check_cached_clean(self): [os.remove(x) for x in self.tmpfiles] result = d.check_cached(self.tmpdir, kind='dictionary') expected = [] self.assertEqual(result, expected) result = d.check_cached(self.tmpdir, kind='data') self.assertEqual(result, expected) os.rmdir(self.tmpdir) result = d.check_cached(self.tmpdir) self.assertEqual(result, expected)
def download(kind, settings, overwrite=False): """ Download files from NBER. kind : {'dictionary', 'data'} settings : dict overwrite : bool; default True Whether to overwrite existing files """ s_path = {'dictionary': 'dd_path', 'data': 'monthly_path'}[kind] cached = dl.check_cached(settings[s_path], kind=kind) files = dl.all_monthly_files(kind=kind) if kind == 'dictionary': files = filter(itemgetter(1), files) # make sure not None cpsdec! months = [par._month_to_dd(settings['date_start']), par._month_to_dd(settings['date_end'])] else: months = [[settings['date_start'], settings['date_end']]] files = dl.filter_monthly(files, months=months, kind=kind) def is_new(x, cache=None): return dl.rename_cps_monthly(x[1]) not in cache for month, renamed in files: if is_new((month, renamed), cache=cached) or overwrite: dl.download_month(month, Path(settings[s_path])) logger.info("Downloaded {}".format(renamed)) else: logger.info("Using cached {}".format(renamed))
def test_check_cached_other(self): with self.assertRaises(ValueError): d.check_cached(self.tmpdir, kind='foo')
def test_check_cached_ddf(self): result = d.check_cached(self.tmpdir, kind='dictionary') expected = ['c1.ddf', 'c2.asc'] self.assertEqual(result, expected)
def test_check_cached_data(self): result = d.check_cached(self.tmpdir, kind='data') expected = ['c3.zip', 'z4.Z'] self.assertEqual(result, expected)
def test_check_cached_other(self): with self.assertRaises(ValueError): result = d.check_cached(self.tmpdir, kind='foo')
def test_check_cached_ddf(self): result = d.check_cached(self.tmpdir, kind='dictionary') expected = ['c1.ddf', 'c2.asc']
def test_check_cached_data(self): result = d.check_cached(self.tmpdir, kind='data') expected = ['c3.zip', 'c4.Z']
def test_check_cached_data(self): result = d.check_cached(self.tmpdir, kind='data') expected = ['c3.zip', 'z4.Z'] self.assertEqual(result, expected)