def get_series_metadata(series): api_call_result = fred.series(series) metadata = api_call_result['seriess'][0] metadata_dict = { 'Table Info': ['Title:', 'Units:', 'Frequency:'], '': [metadata['title'], metadata['units'], metadata['frequency']] } return metadata_dict
def about_series(series): api_call_result = fred.series(series) series_metadata = api_call_result['seriess'][0] series_detail_dict = { 'Series Info:': [ 'ID:', 'Title:', 'Obs. Start:', 'Obs. End:', 'Frequency:', 'Units:', 'Seasonal Adjustment:', 'Last Updated:' ], '': [ series_metadata['id'], series_metadata['title'], series_metadata['observation_start'], series_metadata['observation_end'], series_metadata['frequency'], series_metadata['units'], series_metadata['seasonal_adjustment'], series_metadata['last_updated'] ] } return series_detail_dict
def downloadDataFromFred_fillMetadata(ticker): series_data = fred.series(ticker) log.info('\tFinding Metadata for %s', ticker) if 'error_message' in series_data.keys(): log.warning('\tError in Finding Metadata for %s', ticker) log.warning('\tError: %s', historical_data['error_message']) status = 'ERROR: ' + series_data['error_message'] else: log.info('\tFound Metadata for %s... Writing to DB', ticker) # Read Data metadata = series_data['seriess'][0] # Write Data to Database try: downloadDataFromFred_fillMetadata_parseSeriesData(ticker, metadata) except: raise # Update Data in CSV status = 'ok' return status
def seriesFreq(series): res = fred.series(series)['seriess'][0]['frequency'].lower() if res.find(",") != -1: return res[:res.find(",")] else: return res
def seriesTitle(series): return fred.series(series)['seriess'][0]['title']
def test_fred_series_release(self): fred.key('abc') fred.series(releases=True) expected = 'http://api.stlouisfed.org/fred/series/release' params = {'api_key': 'abc', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def refresh(self, bulk=False, skip_to=None, fn=None, no_download=False, ids=None, force=False, **kwargs): """ Reads the associated API and saves data to tables. """ if skip_to: skip_to = int(skip_to) tmp_debug = settings.DEBUG settings.DEBUG = False django.db.transaction.enter_transaction_management() django.db.transaction.managed(True) try: if bulk: local_fn = self.download_bulk_data(fn=fn, no_download=no_download) dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn) if dsfile.complete: return # Process CSV. print 'Reading file...' sys.stdout.flush() source = zipfile.ZipFile(local_fn, 'r') if dsfile.total_lines_complete: total = dsfile.total_lines if not skip_to: skip_to = dsfile.total_lines_complete else: total = len(source.open(s.BULK_INDEX_FN, 'r').readlines()) DataSourceFile.objects.filter(id=dsfile.id).update( complete=False, total_lines=total, total_lines_complete=0, percent=0, ) django.db.transaction.commit() line_iter = iter(source.open(s.BULK_INDEX_FN, 'r')) offset = 0 while 1: try: line = line_iter.next() offset += 1 #print 'line:',line.strip() if line.lower().startswith('series '): line_iter.next() offset += 1 break except StopIteration: break total -= offset i = 0 just_skipped = False data = csv.DictReader(line_iter, delimiter=';') for row in data: i += 1 if skip_to and i < skip_to: if not just_skipped: print print '\rSkipping from %s to %s...' % (i, skip_to), sys.stdout.flush() just_skipped = True continue elif just_skipped: just_skipped = False print DataSourceFile.objects.filter(id=dsfile.id).update( downloaded=True, complete=False, total_lines=total, total_lines_complete=i, percent=i/float(total)*100, ) if not i % 10: django.db.transaction.commit() row = dict( ( (k or '').strip().lower().replace(' ', '_'), (v or '').strip() ) for k,v in row.iteritems() ) if not row.get('file'): continue print '\rLoading %s %.02f%% (%i of %i)...' % (row.get('file'), i/float(total)*100, i, total), sys.stdout.flush() row['id'] = row['file'].split('\\')[-1].split('.')[0] section_fn = row['file'] # FRED2_csv_2/data/4/4BIGEURORECP.csv del row['file'] if row['last_updated']: row['last_updated'] = dateutil.parser.parse(row['last_updated']) row['last_updated'] = date(row['last_updated'].year, row['last_updated'].month, row['last_updated'].day) #print row series, _ = Series.objects.get_or_create(id=row['id'], defaults=row) series.last_updated = row['last_updated'] series_min_date = series.min_date series_max_date = series.max_date prior_series_dates = set(series.data.all().values_list('date', flat=True)) if series.max_date and series.last_updated > (series.max_date - timedelta(days=s.LAST_UPDATE_DAYS)): continue elif not section_fn.endswith('.csv'): continue section_fn = 'FRED2_csv_2/data/' + section_fn.replace('\\', '/') #print 'section_fn:',section_fn lines = source.open(section_fn, 'r').readlines() #last_data = None last_data_date = None last_data_value = None total2 = len(source.open(section_fn, 'r').readlines()) i2 = 0 if s.EXPAND_DATA_TO_DAYS: print series_data_pending = [] for row in csv.DictReader(source.open(section_fn, 'r')): i2 += 1 if s.EXPAND_DATA_TO_DAYS: print '\r\tLine %.02f%% (%i of %i)' % (i2/float(total2)*100, i2, total2), sys.stdout.flush() row['date'] = dateutil.parser.parse(row['DATE']) row['date'] = date(row['date'].year, row['date'].month, row['date'].day) # series_min_date = min(series_min_date or row['date'], row['date']) # series_max_date = max(series_max_date or row['date'], row['date']) del row['DATE'] try: row['value'] = float(row['VALUE']) except ValueError: print print 'Invalid value: "%s"' % (row['VALUE'],) sys.stdout.flush() continue del row['VALUE'] #print row if s.EXPAND_DATA_TO_DAYS and last_data_date: intermediate_days = (row['date'] - last_data_date).days #print 'Expanding data to %i intermediate days...' % (intermediate_days,) #sys.stdout.flush() #Data.objects.bulk_create([ series_data_pending.extend([ Data(series=series, date=last_data_date+timedelta(days=_days), value=last_data_value) for _days in xrange(1, intermediate_days) if (last_data_date+timedelta(days=_days)) not in prior_series_dates ]) #data, _ = Data.objects.get_or_create(series=series, date=row['date'], defaults=row) if row['date'] not in prior_series_dates: data = Data(series=series, date=row['date'], value=row['value']) series_data_pending.append(data) #data.save() last_data_date = row['date'] last_data_value = row['value'] if series_data_pending: Data.objects.bulk_create(series_data_pending) # print '\r\tLine %.02f%% (%i of %i)' % (100, i2, total2), # print series.last_refreshed = date.today() series.save() # Cleanup. django.db.transaction.commit() Series.objects.update() Data.objects.update() gc.collect() DataSourceFile.objects.filter(id=dsfile.id).update( complete=True, downloaded=True, total_lines=total, total_lines_complete=total, percent=100, ) else: #TODO:use API to download data for each series_id individually #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key> #TODO:check for revised values using output_type? #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type if force: if ids: q = Series.objects.all() else: q = Series.objects.get_loadable() else: q = Series.objects.get_stale(days=30) if ids: q = q.filter(id__in=ids) fred.key(s.API_KEY) i = 0 total = q.count() print '%i series found.' % (total,) for series in q.iterator(): i += 1 print '\rImporting %i of %i' % (i, total), sys.stdout.flush() observation_start = None if series.max_date: observation_start = series.max_date - timedelta(days=7) try: series_info = fred.series(series.id)['seriess'][0] except KeyError: print>>sys.stderr, 'Series %s is missing seriess: %s' % (series.id, fred.series(series.id),) continue except Exception as e: print>>sys.stderr, 'Error on %s: %s' % (series.id, e,) continue #print 'series_info:',series_info last_updated = series_info['last_updated'].strip() series.last_updated = dateutil.parser.parse(last_updated) if last_updated else None series.popularity = series_info['popularity'] series.save() try: series_data = fred.observations( series.id, observation_start=observation_start) except ValueError as e: print>>sys.stderr, e continue for data in series_data['observations']: #print series, data['date'], data['value'] try: value = float(data['value']) except (ValueError, TypeError) as e: print>>sys.stderr, e continue dt = date(*map(int, data['date'].split('-'))) data, created = Data.objects.get_or_create( series=series, date=dt, defaults=dict(value=value)) if not created: data.value = value data.save() series = Series.objects.get(id=series.id) if series.last_updated: most_recent_past_date = series.data.filter(date__lte=date.today()).aggregate(Max('date'))['date__max'] threshold = series.last_updated - timedelta(days=series.days) # print # print 'most_recent_past_date:',most_recent_past_date # print 'last_updated:',series.last_updated # print 'threshold:',threshold if most_recent_past_date: if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1: #TODO: Is this a safe assumption? Might not matter for series without future data. series.date_is_start = True elif most_recent_past_date >= threshold: series.date_is_start = False else: series.date_is_start = True series.last_refreshed = date.today() series.save() if force: series.data.all().update(start_date_inclusive=None, end_date_inclusive=None) missing_dates = series.data.filter(Q(start_date_inclusive__isnull=True)|Q(end_date_inclusive__isnull=True)) print 'Updating %i date ranges.' % (missing_dates.count(),) for _ in missing_dates.iterator(): _.set_date_range() _.save() django.db.transaction.commit() print finally: #print "Committing..." settings.DEBUG = tmp_debug django.db.transaction.commit() django.db.transaction.leave_transaction_management()
def test_fred_series_release(self): fred.key('abc') fred.series(releases=True) expected = 'https://api.stlouisfed.org/fred/series/release' params = {'api_key': 'abc', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def test_fred_series(self): fred.key('abc') fred.series() expected = 'http://api.stlouisfed.org/fred/series' params = {'api_key': 'abc'} self.get.assert_called_with(expected, params=params)
def refresh(self, bulk=False, skip_to=None, fn=None, no_download=False, ids=None, force=False, **kwargs): """ Reads the associated API and saves data to tables. """ if skip_to: skip_to = int(skip_to) tmp_debug = settings.DEBUG settings.DEBUG = False django.db.transaction.enter_transaction_management() django.db.transaction.managed(True) try: if bulk: local_fn = self.download_bulk_data(fn=fn, no_download=no_download) dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn) if dsfile.complete: return # Process CSV. print('Reading file...') sys.stdout.flush() source = zipfile.ZipFile(local_fn, 'r') if dsfile.total_lines_complete: total = dsfile.total_lines if not skip_to: skip_to = dsfile.total_lines_complete else: total = len(source.open(s.BULK_INDEX_FN, 'r').readlines()) DataSourceFile.objects.filter(id=dsfile.id).update( complete=False, total_lines=total, total_lines_complete=0, percent=0, ) django.db.transaction.commit() line_iter = iter(source.open(s.BULK_INDEX_FN, 'r')) offset = 0 while 1: try: line = line_iter.next() offset += 1 if line.lower().startswith('series '): line_iter.next() offset += 1 break except StopIteration: break total -= offset i = 0 just_skipped = False data = csv.DictReader(line_iter, delimiter=';') for row in data: i += 1 if skip_to and i < skip_to: if not just_skipped: print print('\rSkipping from %s to %s...' % (i, skip_to)) sys.stdout.flush() just_skipped = True continue elif just_skipped: just_skipped = False print DataSourceFile.objects.filter(id=dsfile.id).update( downloaded=True, complete=False, total_lines=total, total_lines_complete=i, percent=i / float(total) * 100, ) if not i % 10: django.db.transaction.commit() row = dict(((k or '').strip().lower().replace(' ', '_'), (v or '').strip()) for k, v in row.iteritems()) if not row.get('file'): continue print('\rLoading %s %.02f%% (%i of %i)...' % (row.get('file'), i / float(total) * 100, i, total)) sys.stdout.flush() row['id'] = row['file'].split('\\')[-1].split('.')[0] section_fn = row[ 'file'] # FRED2_csv_2/data/4/4BIGEURORECP.csv del row['file'] if row['last_updated']: row['last_updated'] = dateutil.parser.parse( row['last_updated']) row['last_updated'] = date(row['last_updated'].year, row['last_updated'].month, row['last_updated'].day) series, _ = Series.objects.get_or_create(id=row['id'], defaults=row) series.last_updated = row['last_updated'] series_min_date = series.min_date series_max_date = series.max_date prior_series_dates = set(series.data.all().values_list( 'date', flat=True)) if series.max_date and series.last_updated > ( series.max_date - timedelta(days=s.LAST_UPDATE_DAYS)): continue elif not section_fn.endswith('.csv'): continue section_fn = 'FRED2_csv_2/data/' + section_fn.replace( '\\', '/') lines = source.open(section_fn, 'r').readlines() #last_data = None last_data_date = None last_data_value = None total2 = len(source.open(section_fn, 'r').readlines()) i2 = 0 if s.EXPAND_DATA_TO_DAYS: print series_data_pending = [] for row2 in csv.DictReader(source.open(section_fn, 'r')): i2 += 1 if s.EXPAND_DATA_TO_DAYS: print('\r\tLine %.02f%% (%i of %i)' % (i2 / float(total2) * 100, i2, total2)) sys.stdout.flush() row2['date'] = dateutil.parser.parse(row2['DATE']) row2['date'] = date(row2['date'].year, row2['date'].month, row2['date'].day) # series_min_date = min(series_min_date or row2['date'], row2['date']) # series_max_date = max(series_max_date or row2['date'], row2['date']) del row2['DATE'] try: row2['value'] = float(row2['VALUE']) except ValueError: print print('Invalid value: "%s"' % (row2['VALUE'], )) sys.stdout.flush() continue del row2['VALUE'] if s.EXPAND_DATA_TO_DAYS and last_data_date: intermediate_days = (row2['date'] - last_data_date).days #print 'Expanding data to %i intermediate days...' % (intermediate_days,) #sys.stdout.flush() #Data.objects.bulk_create([ series_data_pending.extend([ Data(series=series, date=last_data_date + timedelta(days=_days), value=last_data_value) for _days in range(1, intermediate_days) if (last_data_date + timedelta(days=_days) ) not in prior_series_dates ]) #data, _ = Data.objects.get_or_create(series=series, date=row2['date'], defaults=row2) if row2['date'] not in prior_series_dates: data = Data(series=series, date=row2['date'], value=row2['value']) series_data_pending.append(data) #data.save() last_data_date = row2['date'] last_data_value = row2['value'] if series_data_pending: Data.objects.bulk_create(series_data_pending) series.last_refreshed = date.today() series.save() # Cleanup. django.db.transaction.commit() Series.objects.update() Data.objects.update() gc.collect() DataSourceFile.objects.filter(id=dsfile.id).update( complete=True, downloaded=True, total_lines=total, total_lines_complete=total, percent=100, ) else: #TODO:use API to download data for each series_id individually #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key> #TODO:check for revised values using output_type? #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type if force: if ids: q = Series.objects.all() else: q = Series.objects.get_loadable() else: q = Series.objects.get_stale(days=30) if ids: q = q.filter(id__in=ids) fred.key(s.API_KEY) i = 0 total = q.count() print('%i series found.' % (total, )) for series in q.iterator(): i += 1 sys.stdout.write('\rImporting %i of %i' % (i, total)) sys.stdout.flush() observation_start = None if series.max_date: observation_start = series.max_date - timedelta(days=7) try: series_info = fred.series(series.id)['seriess'][0] except KeyError: print('Series %s is missing seriess: %s' % ( series.id, fred.series(series.id), ), file=sys.stderr) continue except Exception as e: print('Error on %s: %s' % ( series.id, e, ), file=sys.stderr) continue last_updated = series_info['last_updated'].strip() series.last_updated = dateutil.parser.parse( last_updated) if last_updated else None series.popularity = series_info['popularity'] series.save() try: series_data = fred.observations( series.id, observation_start=observation_start) except ValueError as e: print('Error getting observations: %s' % e, file=sys.stderr) continue for data in series_data['observations']: try: value = float(data['value']) except (ValueError, TypeError) as e: print('Error converting to float: %s' % data['value'], file=sys.stderr) continue dt = date(*map(int, data['date'].split('-'))) data, created = Data.objects.get_or_create( series=series, date=dt, defaults=dict(value=value)) if not created: data.value = value data.save() series = Series.objects.get(id=series.id) if series.last_updated: most_recent_past_date = series.data.filter( date__lte=date.today()).aggregate( Max('date'))['date__max'] threshold = series.last_updated - timedelta( days=series.days) if most_recent_past_date: if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1: #TODO: Is this a safe assumption? Might not matter for series without future data. series.date_is_start = True elif most_recent_past_date >= threshold: series.date_is_start = False else: series.date_is_start = True series.last_refreshed = date.today() series.save() if force: series.data.all().update(start_date_inclusive=None, end_date_inclusive=None) missing_dates = series.data.filter( Q(start_date_inclusive__isnull=True) | Q(end_date_inclusive__isnull=True)) print('Updating %i date ranges.' % (missing_dates.count(), )) for _ in missing_dates.iterator(): _.set_date_range() _.save() django.db.transaction.commit() print finally: settings.DEBUG = tmp_debug django.db.transaction.commit() django.db.transaction.leave_transaction_management()