Beispiel #1
0
def download(overwrite_cached=False):
    settings = par.read_settings(str(_HERE_ / 'settings.json'))
    cached_dd = dl.check_cached(settings['dd_path'], kind='dictionary')
    cached_month = dl.check_cached(settings['monthly_path'], kind='data')

    dds = dl.all_monthly_files(kind='dictionary')
    dds = filter(itemgetter(1), dds)  # make sure not None cpsdec!
    dds = dl.filter_dds(dds, months=[par._month_to_dd(settings['date_start']),
                                     par._month_to_dd(settings['date_end'])])

    data = dl.all_monthly_files()
    data = dl.filter_monthly_files(data, months=[[settings['date_start'],
                                                  settings['date_end']]])
    if not overwrite_cached:
        def is_new(x, cache=None):
            return dl.rename_cps_monthly(x[1]) not in cache

        dds = filter(partial(is_new, cache=cached_dd), dds)
        data = filter(partial(is_new, cache=cached_month), data)

    for month, renamed in dds:
        dl.download_month(month, Path(settings['dd_path']))
        logging.info("Downloaded {}".format(renamed))

    for month, renamed in data:
        dl.download_month(month, Path(settings['monthly_path']))
        logging.info("Downloaded {}".format(renamed))
Beispiel #2
0
def download(kind, settings, overwrite=False):
    """
    Download files from NBER.

    kind : {'dictionary', 'data'}
    settings : dict
    overwrite : bool; default True
        Whether to overwrite existing files
    """
    s_path = {'dictionary': 'dd_path', 'data': 'monthly_path'}[kind]
    cached = dl.check_cached(settings[s_path], kind=kind)

    files = dl.all_monthly_files(kind=kind)
    if kind == 'dictionary':
        files = filter(itemgetter(1), files)   # make sure not None cpsdec!
        months = [par._month_to_dd(settings['date_start']),
                  par._month_to_dd(settings['date_end'])]
    else:
        months = [[settings['date_start'], settings['date_end']]]

    files = dl.filter_monthly(files, months=months, kind=kind)

    def is_new(x, cache=None):
        return dl.rename_cps_monthly(x[1]) not in cache

    for month, renamed in files:
        if is_new((month, renamed), cache=cached) or overwrite:
            dl.download_month(month, Path(settings[s_path]))
            logger.info("Downloaded {}".format(renamed))
        else:
            logger.info("Using cached {}".format(renamed))
Beispiel #3
0
 def test_all_monthly_dd(self):
     result = list(
         d.all_monthly_files(curdir + '/files/trimmed_nber.html',
                             kind='dictionary'))
     expected = [('cpsbjan03.ddf', 'cpsm2003-01.ddf'),
                 ('jan94_mar94_dd.txt', 'cpsm1994-01.txt'),
                 ('apr94_may95_dd.txt', 'cpsm1994-04.txt'),
                 ('jun95_aug95_dd.txt', 'cpsm1995-06.txt')]
     self.assertEqual(result, expected)
 def test_all_monthly_dd(self):
     result = list(d.all_monthly_files(curdir + '/files/trimmed_nber.html',
                                       kind='dictionary'))
     expected = [('cpsbjan03.ddf', 'cpsm2003-01.ddf'),
                 ('jan94_mar94_dd.txt', 'cpsm1994-01.txt'),
                 ('apr94_may95_dd.txt', 'cpsm1994-04.txt'),
                 ('jun95_aug95_dd.txt', 'cpsm1995-06.txt')
                 ]
     self.assertEqual(result, expected)
Beispiel #5
0
 def test_all_monthly_other(self):
     with self.assertRaises(ValueError):
         list(d.all_monthly_files(kind='foo'))  # need to thunk
Beispiel #6
0
 def test_all_monthly_files(self):
     result = list(d.all_monthly_files(curdir + '/files/trimmed_nber.html'))
     expected = [('cpsb7601.Z', 'cpsm1976-01.Z'),
                 ('jan94pub.zip', 'cpsm1994-01.zip')]
     self.assertEqual(result, expected)
Beispiel #7
0
 def test_all_monthly_other(self):
     with self.assertRaises(ValueError):
         list(d.all_monthly_files(kind='foo'))  # need to thunk
Beispiel #8
0
 def test_all_monthly_dd(self):
     result = list(d.all_monthly_files(curdir + '/files/trimmed_nber.html',
                                       kind='dictionary'))
     expected = [('cpsbjan03.ddf', 'cpsm2003-01.ddf')]
     self.assertEqual(result, expected)
Beispiel #9
0
 def test_all_monthly_files(self):
     result = list(d.all_monthly_files(curdir + '/files/trimmed_nber.html'))
     expected = [('cpsb7601.Z', 'cpsm1976-01.Z'),
                 ('jan94pub.zip', 'cpsm1994-01.zip')]
     self.assertEqual(result, expected)