def get_fred_series(series): def filter(o): return {'date': datetime.strptime(o['date'],'%Y-%m-%d').date(), series: o['value']} return pd.DataFrame(map(filter,fred.observations(series)['observations']), dtype='float64').set_index('date').dropna()
def get_fred_series(series): def filter(o): return {'date': datetime.strptime(o['date'],'%Y-%m-%d').date(), series: o['value']} return pd.DataFrame(map(filter,fred.observations(series)['observations']),dtype='float64').set_index('date').dropna()
def loadDatesForLiborRates(self, term, numTerm): series = self.convertTermNumTermToLiborSeries(term, numTerm) values = fred.observations(series)['observations']['observation'] dates = [] for value in values: dates.append(value['date']) return dates
def firstObs(series): min_date = datetime.datetime.today() for obs in fred.observations(series)['observations']: date_ = datetime.datetime.strptime(obs['date'], "%Y-%m-%d") if date_ < min_date: min_date = date_ return min_date
def lastObs(series): max_date = datetime.datetime.strptime("1919-01-01", "%Y-%m-%d") for obs in fred.observations(series)['observations']: date_ = datetime.datetime.strptime(obs['date'], "%Y-%m-%d") if date_ > max_date: max_date = date_ return max_date
def fred_series_mult_pd(symbol_list, api_key): """ Use fred api to retrieve time series data. Args: symbol_list: list of strings representing fred series symbols api_key: developer API key from https://fred.stlouisfed.org/ Returns: merged pandas dataframe with date ('dt') and numeric value (<symbol>) columns Dependencies: pandas, fred, functools.reduce """ # Use API key fred.key(api_key) # Define inner functions def inner_join_pandas_list(pandas_list, join_on): return reduce(lambda a, b: pd.merge(a, b, on=join_on), pandas_list) # Query each series in symbol_list, append to df_list df_list = [] for symbol in symbol_list: # Import series, convert to dataframe, drop query timestamps, rename columns, format dates series_df = pd.DataFrame(fred.observations(symbol)['observations']).\ drop(['realtime_start','realtime_end'], axis = 1).\ rename(columns = {'date' : 'dt', 'value' : symbol.lower()}) series_df['dt'] = [x.date() for x in pd.to_datetime(series_df['dt'])] df_list.append(series_df) # Merge list of dataframes return inner_join_pandas_list(df_list, 'dt')
def get_fred(id_str): """get_fred takes the FRED key for a series as a parameter e.g. 'PCE96' for real consumer spending, and returns a list of dictionaries. There is a dictionary for each date. Dictionary keys are date, realtime_start, realtime_end, value""" fred_key1 = "observations" fred_key2 = "observation" return fred.observations(id_str)[fred_key1]
def getSeriesDataTuple(series): dates_values = [] for obs in fred.observations(series)['observations']: date_ = datetime.datetime.strptime(obs['date'], "%Y-%m-%d") date_ = datetime.datetime.date(date_) value = obs['value'] dates_values.append((date_, value)) return dates_values
def get_hour_value(self): fred.key(settings.FRED_KEY) last_observation = fred.observations( settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: return float(h) except: return settings.DEFAULT_HOUR_VALUE_IN_USD
def download(self): self.dfList = [] for tag in self.tags: self.obsList = [] obsRaw = fred.observations(tag[0], observation_start=self.obsStart, observation_end=self.obsEnd, units=tag[1]) for obs in obsRaw['observations']: self.obsList.append((obs['date'], obs['value'])) df = pd.DataFrame(self.obsList, columns=['DATE', tag[0]]) self.dfList.append(df) self.data = self.mergeByDate(self.dfList) self.data = self.data.apply(pd.to_numeric, errors='ignore')
def get_series_data(series, years=5): today = datetime.today().strftime('%Y-%m-%d') five_years_ago = ( datetime.today() - timedelta(days=years * days_in_a_year)).strftime('%Y-%m-%d') api_call_result = fred.observations(series, observation_start=five_years_ago, observation_end=today) series_data = _unpack_series_data(series, api_call_result) return series_data
def set_hour_rate(self, h=None): if h: self.hour = h print "Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour self.set_currenc_rates() print "Done." else: fred.key('0c9a5ec8dd8c63ab8cbec6514a8f5b37') last_observation = fred.observations( settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: self.hour = float(h) print "Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour except: self.hour = 25. print "Failed to retrieve rates from FRED API. Assuming 1h = 25usd."
def set_hour_rate(self, h=None): if h: self.hour = h print("Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour) self.set_currenc_rates() print("Done.") else: fred.key('0c9a5ec8dd8c63ab8cbec6514a8f5b37') last_observation = fred.observations( settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: self.hour = float(h) print("Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour) except: self.hour = 25. print("Failed to retrieve rates from FRED API. Assuming 1h = 25usd.")
def downloadDataFromFred_fillHistory(ticker): log.info('\tFinding Historical Data for %s', ticker) historical_data = fred.observations(ticker) if 'error_message' in historical_data.keys(): log.warning('\tError in Finding Historical Data for %s', ticker) log.warning('\tError: %s', historical_data['error_message']) status = 'ERROR: ' + historical_data['error_message'] else: log.info('\tFound %d Historical Data Points for %s... Writing to DB', len(historical_data['observations']), ticker) # Write Data to Database try: # TODO: Check if we should even fill (i.e. if latest date is more than today's date) downloadDataFromFred_fillHistory_parseSeriesData(ticker, historical_data) except: raise # Update Data in CSV status = 'ok' return status
def getObs(l, *args): #takes list from searchTitle and gets obs if l == []: return {} if not args: sels = raw_input('Enter selection: ') sels = sels.split(' ') if '0' in sels: sels = range(1, len(l)) else: sels = [1] obs = {} #dict{'seriesID': {'date':value}} for i in sels: try: res = fred.observations(l[(int(i))][1]) tempD = {} for item in res['observations']: tempD[item['date']] = item['value'] obs[l[(int(i))][1]] = tempD except: print 'Invalid data selection: ', i return obs
def loadAllLiborRates(self, term, numTerm): ''' loads timeSeries of rates for one instrument return dictionary with dates and values if value is '.' then previous value is taken ''' series = self.convertTermNumTermToLiborSeries(term, numTerm) values = fred.observations(series)['observations']['observation'] datesRatesVector = {} prevValue = 0.0 for value in values: badValue = False try: float(value['value']) except ValueError: badValue = True # print str(numTerm)+str(term)+' '+str(badValue)+' '+str(value['date'])+' '+str(value['value']) + ' ' +str(prevValue) if badValue: datesRatesVector[value['date']] = prevValue else: datesRatesVector[value['date']] = value['value'] prevValue = value['value'] return datesRatesVector
def set_hour_rate(self, h=None): if h: self.hour = h print("Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour) self.set_currenc_rates() print("Done.") else: if not settings.FRED_KEY: print("Set settings FRED_KEY. Get one at https://fred.stlouisfed.org") fred.key(settings.FRED_KEY) last_observation = fred.observations(settings.FRED_SERIES)['observations'][-1] h = last_observation['value'] try: self.hour = float(h) print("Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour) except: self.hour = 28.18 print("Failed to retrieve rates from FRED API. Assuming 1h = 28.18 usd.")
def source_FRED(credentials, small_sample:bool=False, id_freq_list_path:str="") -> None: """ Source the full FRED dataset and save to files. https://fred.stlouisfed.org/ """ # Setup directories f they do not exist # TODO # Create fred connection using api-key fred.key(credentials.API_KEY_FED.key) if small_sample: try: if id_freq_list_path == "": filename="dummy_id_freq_list.json" with open(cfg.source.path.FRED.meta + filename, "r") as fp: ids_freqs = json.load(fp) else: try: with open(id_freq_list_path, "r") as fp: ids_freqs = json.load(fp) except Exception as e: logger.warning(e) logger.warning(f"Not able to read provided file in path {id_freq_list_path}.") logger.info("Using precomputed list for retrieval from FRED.") except Exception as e: logger.info(e) logger.info("Not able to find predefined list of ids. Crawling FRED instead.") # Crawl to get a full list of available time series. ids_freqs = {} for s in fred.category_series(33936)["seriess"]: ids_freqs[s["id"]] = s["frequency_short"] filename="dummy_id_freq_list.json" #path = os.path.join(cfg.source.path.FRED.meta, filename) with open(cfg.source.path.FRED.meta + filename, "w") as fp: json.dump(ids_freqs, fp, sort_keys=True, indent=4, separators=(",", ": ")) # Download and save all time series. saving each sample as a JSON for id in ids_freqs.keys(): observations = fred.observations(id) json_out = { "source" : "FRED", "id" : id, "frequency" : ids_freqs[id], "values" : [float(obs["value"]) for obs in observations["observations"]] } filename=f"{id}.json" #path = os.path.join(cfg.source.path.FRED.raw, filename) with open(cfg.source.path.FRED.raw + filename, "w") as fp: json.dump(json_out, fp) # Statistics of sourcing # Random dummy data for preprocessing num_preprocessed = 0 for i in range(10000): if num_preprocessed % 1000 == 0: curr_dir = f"dir{num_preprocessed // 1000 :03d}/" os.makedirs(cfg.source.path.FRED.raw + curr_dir, exist_ok=True) out = { "source" : "FRED", "id" : f"{i:04d}", "frequency" : np.random.choice(["Y", "Q", "M", "W", "D", "H"]), "values" : list(np.random.rand(100)), } filename = f"{i:04d}.json" with open(cfg.source.path.FRED.raw + curr_dir + filename, "w") as fp: json.dump(out, fp) num_preprocessed += 1 else: # Crawl to get a full list of available time series. # save every n minutes to avoid having to go redo... #if not os.path.isfile(os.path.join(cfg.source.path.FRED.meta, "ids_freq_list_test.json")): #logger.info("Crawling FRED.") #crawl_fred(api_key=credentials.API_KEY_FED.key, nodes_to_visit=[0], sleep_time=cfg.source.api.FRED.sleep, rate_limit=cfg.source.api.FRED.limit) #logger.info("Done crawling.") #path = os.path.join(cfg.source.path.FRED.meta, "ids_meta.json") logger.info(f"Downloading.") download_ids(api_key=credentials.API_KEY_FED.key, sleep_time=cfg.source.api.FRED.sleep, rate_limit=cfg.source.api.FRED.limit)
def refresh(self, bulk=False, skip_to=None, fn=None, no_download=False, ids=None, force=False, **kwargs): """ Reads the associated API and saves data to tables. """ if skip_to: skip_to = int(skip_to) tmp_debug = settings.DEBUG settings.DEBUG = False django.db.transaction.enter_transaction_management() django.db.transaction.managed(True) try: if bulk: local_fn = self.download_bulk_data(fn=fn, no_download=no_download) dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn) if dsfile.complete: return # Process CSV. print('Reading file...') sys.stdout.flush() source = zipfile.ZipFile(local_fn, 'r') if dsfile.total_lines_complete: total = dsfile.total_lines if not skip_to: skip_to = dsfile.total_lines_complete else: total = len(source.open(s.BULK_INDEX_FN, 'r').readlines()) DataSourceFile.objects.filter(id=dsfile.id).update( complete=False, total_lines=total, total_lines_complete=0, percent=0, ) django.db.transaction.commit() line_iter = iter(source.open(s.BULK_INDEX_FN, 'r')) offset = 0 while 1: try: line = line_iter.next() offset += 1 if line.lower().startswith('series '): line_iter.next() offset += 1 break except StopIteration: break total -= offset i = 0 just_skipped = False data = csv.DictReader(line_iter, delimiter=';') for row in data: i += 1 if skip_to and i < skip_to: if not just_skipped: print print('\rSkipping from %s to %s...' % (i, skip_to)) sys.stdout.flush() just_skipped = True continue elif just_skipped: just_skipped = False print DataSourceFile.objects.filter(id=dsfile.id).update( downloaded=True, complete=False, total_lines=total, total_lines_complete=i, percent=i / float(total) * 100, ) if not i % 10: django.db.transaction.commit() row = dict(((k or '').strip().lower().replace(' ', '_'), (v or '').strip()) for k, v in row.iteritems()) if not row.get('file'): continue print('\rLoading %s %.02f%% (%i of %i)...' % (row.get('file'), i / float(total) * 100, i, total)) sys.stdout.flush() row['id'] = row['file'].split('\\')[-1].split('.')[0] section_fn = row[ 'file'] # FRED2_csv_2/data/4/4BIGEURORECP.csv del row['file'] if row['last_updated']: row['last_updated'] = dateutil.parser.parse( row['last_updated']) row['last_updated'] = date(row['last_updated'].year, row['last_updated'].month, row['last_updated'].day) series, _ = Series.objects.get_or_create(id=row['id'], defaults=row) series.last_updated = row['last_updated'] series_min_date = series.min_date series_max_date = series.max_date prior_series_dates = set(series.data.all().values_list( 'date', flat=True)) if series.max_date and series.last_updated > ( series.max_date - timedelta(days=s.LAST_UPDATE_DAYS)): continue elif not section_fn.endswith('.csv'): continue section_fn = 'FRED2_csv_2/data/' + section_fn.replace( '\\', '/') lines = source.open(section_fn, 'r').readlines() #last_data = None last_data_date = None last_data_value = None total2 = len(source.open(section_fn, 'r').readlines()) i2 = 0 if s.EXPAND_DATA_TO_DAYS: print series_data_pending = [] for row2 in csv.DictReader(source.open(section_fn, 'r')): i2 += 1 if s.EXPAND_DATA_TO_DAYS: print('\r\tLine %.02f%% (%i of %i)' % (i2 / float(total2) * 100, i2, total2)) sys.stdout.flush() row2['date'] = dateutil.parser.parse(row2['DATE']) row2['date'] = date(row2['date'].year, row2['date'].month, row2['date'].day) # series_min_date = min(series_min_date or row2['date'], row2['date']) # series_max_date = max(series_max_date or row2['date'], row2['date']) del row2['DATE'] try: row2['value'] = float(row2['VALUE']) except ValueError: print print('Invalid value: "%s"' % (row2['VALUE'], )) sys.stdout.flush() continue del row2['VALUE'] if s.EXPAND_DATA_TO_DAYS and last_data_date: intermediate_days = (row2['date'] - last_data_date).days #print 'Expanding data to %i intermediate days...' % (intermediate_days,) #sys.stdout.flush() #Data.objects.bulk_create([ series_data_pending.extend([ Data(series=series, date=last_data_date + timedelta(days=_days), value=last_data_value) for _days in range(1, intermediate_days) if (last_data_date + timedelta(days=_days) ) not in prior_series_dates ]) #data, _ = Data.objects.get_or_create(series=series, date=row2['date'], defaults=row2) if row2['date'] not in prior_series_dates: data = Data(series=series, date=row2['date'], value=row2['value']) series_data_pending.append(data) #data.save() last_data_date = row2['date'] last_data_value = row2['value'] if series_data_pending: Data.objects.bulk_create(series_data_pending) series.last_refreshed = date.today() series.save() # Cleanup. django.db.transaction.commit() Series.objects.update() Data.objects.update() gc.collect() DataSourceFile.objects.filter(id=dsfile.id).update( complete=True, downloaded=True, total_lines=total, total_lines_complete=total, percent=100, ) else: #TODO:use API to download data for each series_id individually #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key> #TODO:check for revised values using output_type? #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type if force: if ids: q = Series.objects.all() else: q = Series.objects.get_loadable() else: q = Series.objects.get_stale(days=30) if ids: q = q.filter(id__in=ids) fred.key(s.API_KEY) i = 0 total = q.count() print('%i series found.' % (total, )) for series in q.iterator(): i += 1 sys.stdout.write('\rImporting %i of %i' % (i, total)) sys.stdout.flush() observation_start = None if series.max_date: observation_start = series.max_date - timedelta(days=7) try: series_info = fred.series(series.id)['seriess'][0] except KeyError: print('Series %s is missing seriess: %s' % ( series.id, fred.series(series.id), ), file=sys.stderr) continue except Exception as e: print('Error on %s: %s' % ( series.id, e, ), file=sys.stderr) continue last_updated = series_info['last_updated'].strip() series.last_updated = dateutil.parser.parse( last_updated) if last_updated else None series.popularity = series_info['popularity'] series.save() try: series_data = fred.observations( series.id, observation_start=observation_start) except ValueError as e: print('Error getting observations: %s' % e, file=sys.stderr) continue for data in series_data['observations']: try: value = float(data['value']) except (ValueError, TypeError) as e: print('Error converting to float: %s' % data['value'], file=sys.stderr) continue dt = date(*map(int, data['date'].split('-'))) data, created = Data.objects.get_or_create( series=series, date=dt, defaults=dict(value=value)) if not created: data.value = value data.save() series = Series.objects.get(id=series.id) if series.last_updated: most_recent_past_date = series.data.filter( date__lte=date.today()).aggregate( Max('date'))['date__max'] threshold = series.last_updated - timedelta( days=series.days) if most_recent_past_date: if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1: #TODO: Is this a safe assumption? Might not matter for series without future data. series.date_is_start = True elif most_recent_past_date >= threshold: series.date_is_start = False else: series.date_is_start = True series.last_refreshed = date.today() series.save() if force: series.data.all().update(start_date_inclusive=None, end_date_inclusive=None) missing_dates = series.data.filter( Q(start_date_inclusive__isnull=True) | Q(end_date_inclusive__isnull=True)) print('Updating %i date ranges.' % (missing_dates.count(), )) for _ in missing_dates.iterator(): _.set_date_range() _.save() django.db.transaction.commit() print finally: settings.DEBUG = tmp_debug django.db.transaction.commit() django.db.transaction.leave_transaction_management()
def test_fred_series_observations(self): fred.key('ohai') fred.observations("AAA") expected = 'http://api.stlouisfed.org/fred/series/observations' params = {'api_key': 'ohai', 'series_id': 'AAA', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def refresh(self, bulk=False, skip_to=None, fn=None, no_download=False, ids=None, force=False, **kwargs): """ Reads the associated API and saves data to tables. """ if skip_to: skip_to = int(skip_to) tmp_debug = settings.DEBUG settings.DEBUG = False django.db.transaction.enter_transaction_management() django.db.transaction.managed(True) try: if bulk: local_fn = self.download_bulk_data(fn=fn, no_download=no_download) dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn) if dsfile.complete: return # Process CSV. print 'Reading file...' sys.stdout.flush() source = zipfile.ZipFile(local_fn, 'r') if dsfile.total_lines_complete: total = dsfile.total_lines if not skip_to: skip_to = dsfile.total_lines_complete else: total = len(source.open(s.BULK_INDEX_FN, 'r').readlines()) DataSourceFile.objects.filter(id=dsfile.id).update( complete=False, total_lines=total, total_lines_complete=0, percent=0, ) django.db.transaction.commit() line_iter = iter(source.open(s.BULK_INDEX_FN, 'r')) offset = 0 while 1: try: line = line_iter.next() offset += 1 #print 'line:',line.strip() if line.lower().startswith('series '): line_iter.next() offset += 1 break except StopIteration: break total -= offset i = 0 just_skipped = False data = csv.DictReader(line_iter, delimiter=';') for row in data: i += 1 if skip_to and i < skip_to: if not just_skipped: print print '\rSkipping from %s to %s...' % (i, skip_to), sys.stdout.flush() just_skipped = True continue elif just_skipped: just_skipped = False print DataSourceFile.objects.filter(id=dsfile.id).update( downloaded=True, complete=False, total_lines=total, total_lines_complete=i, percent=i/float(total)*100, ) if not i % 10: django.db.transaction.commit() row = dict( ( (k or '').strip().lower().replace(' ', '_'), (v or '').strip() ) for k,v in row.iteritems() ) if not row.get('file'): continue print '\rLoading %s %.02f%% (%i of %i)...' % (row.get('file'), i/float(total)*100, i, total), sys.stdout.flush() row['id'] = row['file'].split('\\')[-1].split('.')[0] section_fn = row['file'] # FRED2_csv_2/data/4/4BIGEURORECP.csv del row['file'] if row['last_updated']: row['last_updated'] = dateutil.parser.parse(row['last_updated']) row['last_updated'] = date(row['last_updated'].year, row['last_updated'].month, row['last_updated'].day) #print row series, _ = Series.objects.get_or_create(id=row['id'], defaults=row) series.last_updated = row['last_updated'] series_min_date = series.min_date series_max_date = series.max_date prior_series_dates = set(series.data.all().values_list('date', flat=True)) if series.max_date and series.last_updated > (series.max_date - timedelta(days=s.LAST_UPDATE_DAYS)): continue elif not section_fn.endswith('.csv'): continue section_fn = 'FRED2_csv_2/data/' + section_fn.replace('\\', '/') #print 'section_fn:',section_fn lines = source.open(section_fn, 'r').readlines() #last_data = None last_data_date = None last_data_value = None total2 = len(source.open(section_fn, 'r').readlines()) i2 = 0 if s.EXPAND_DATA_TO_DAYS: print series_data_pending = [] for row in csv.DictReader(source.open(section_fn, 'r')): i2 += 1 if s.EXPAND_DATA_TO_DAYS: print '\r\tLine %.02f%% (%i of %i)' % (i2/float(total2)*100, i2, total2), sys.stdout.flush() row['date'] = dateutil.parser.parse(row['DATE']) row['date'] = date(row['date'].year, row['date'].month, row['date'].day) # series_min_date = min(series_min_date or row['date'], row['date']) # series_max_date = max(series_max_date or row['date'], row['date']) del row['DATE'] try: row['value'] = float(row['VALUE']) except ValueError: print print 'Invalid value: "%s"' % (row['VALUE'],) sys.stdout.flush() continue del row['VALUE'] #print row if s.EXPAND_DATA_TO_DAYS and last_data_date: intermediate_days = (row['date'] - last_data_date).days #print 'Expanding data to %i intermediate days...' % (intermediate_days,) #sys.stdout.flush() #Data.objects.bulk_create([ series_data_pending.extend([ Data(series=series, date=last_data_date+timedelta(days=_days), value=last_data_value) for _days in xrange(1, intermediate_days) if (last_data_date+timedelta(days=_days)) not in prior_series_dates ]) #data, _ = Data.objects.get_or_create(series=series, date=row['date'], defaults=row) if row['date'] not in prior_series_dates: data = Data(series=series, date=row['date'], value=row['value']) series_data_pending.append(data) #data.save() last_data_date = row['date'] last_data_value = row['value'] if series_data_pending: Data.objects.bulk_create(series_data_pending) # print '\r\tLine %.02f%% (%i of %i)' % (100, i2, total2), # print series.last_refreshed = date.today() series.save() # Cleanup. django.db.transaction.commit() Series.objects.update() Data.objects.update() gc.collect() DataSourceFile.objects.filter(id=dsfile.id).update( complete=True, downloaded=True, total_lines=total, total_lines_complete=total, percent=100, ) else: #TODO:use API to download data for each series_id individually #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key> #TODO:check for revised values using output_type? #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type if force: if ids: q = Series.objects.all() else: q = Series.objects.get_loadable() else: q = Series.objects.get_stale(days=30) if ids: q = q.filter(id__in=ids) fred.key(s.API_KEY) i = 0 total = q.count() print '%i series found.' % (total,) for series in q.iterator(): i += 1 print '\rImporting %i of %i' % (i, total), sys.stdout.flush() observation_start = None if series.max_date: observation_start = series.max_date - timedelta(days=7) try: series_info = fred.series(series.id)['seriess'][0] except KeyError: print>>sys.stderr, 'Series %s is missing seriess: %s' % (series.id, fred.series(series.id),) continue except Exception as e: print>>sys.stderr, 'Error on %s: %s' % (series.id, e,) continue #print 'series_info:',series_info last_updated = series_info['last_updated'].strip() series.last_updated = dateutil.parser.parse(last_updated) if last_updated else None series.popularity = series_info['popularity'] series.save() try: series_data = fred.observations( series.id, observation_start=observation_start) except ValueError as e: print>>sys.stderr, e continue for data in series_data['observations']: #print series, data['date'], data['value'] try: value = float(data['value']) except (ValueError, TypeError) as e: print>>sys.stderr, e continue dt = date(*map(int, data['date'].split('-'))) data, created = Data.objects.get_or_create( series=series, date=dt, defaults=dict(value=value)) if not created: data.value = value data.save() series = Series.objects.get(id=series.id) if series.last_updated: most_recent_past_date = series.data.filter(date__lte=date.today()).aggregate(Max('date'))['date__max'] threshold = series.last_updated - timedelta(days=series.days) # print # print 'most_recent_past_date:',most_recent_past_date # print 'last_updated:',series.last_updated # print 'threshold:',threshold if most_recent_past_date: if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1: #TODO: Is this a safe assumption? Might not matter for series without future data. series.date_is_start = True elif most_recent_past_date >= threshold: series.date_is_start = False else: series.date_is_start = True series.last_refreshed = date.today() series.save() if force: series.data.all().update(start_date_inclusive=None, end_date_inclusive=None) missing_dates = series.data.filter(Q(start_date_inclusive__isnull=True)|Q(end_date_inclusive__isnull=True)) print 'Updating %i date ranges.' % (missing_dates.count(),) for _ in missing_dates.iterator(): _.set_date_range() _.save() django.db.transaction.commit() print finally: #print "Committing..." settings.DEBUG = tmp_debug django.db.transaction.commit() django.db.transaction.leave_transaction_management()
def test_fred_series_observations(self): fred.key('ohai') fred.observations("AAA") expected = 'https://api.stlouisfed.org/fred/series/observations' params = {'api_key': 'ohai', 'series_id': 'AAA', 'file_type': 'json'} self.get.assert_called_with(expected, params=params)
def download_ids(api_key:str, sleep_time:int=60, rate_limit:int=100) -> None: """ Downloading all time series in the provided JSON file. """ fred.key(api_key) #num_requests = 0 tot_downloaded = 0 num_files_written = 0 list_json = [] request_time_stamps = [] #curr_dir = f"dir{tot_downloaded // cfg.source.files_per_folder :04d}/" dirs = glob.glob(cfg.source.path.FRED.meta + "/*") for d in dirs: file_names = glob.glob(d + "/*") for fname in file_names: with open(fname, "r") as fp: ids_meta = json.load(fp) fp.close() for id_meta in ids_meta: try: observations = fred.observations(id_meta["id"]) request_time_stamps.append(time.time()) #num_requests += 1 ts = { "id" : id_meta["id"], "source" : id_meta["source"], "node_id" : id_meta["node_id"], "category_name" : id_meta["category_name"], "parent_id" : id_meta["parent_id"], "frequency" : id_meta["frequency"], "observations" : [{"date" : obs["date"], "value" : obs["value"]} for obs in observations["observations"]] } tot_downloaded += 1 list_json.append(ts) if len(list_json) > cfg.source.samples_per_json: filename = f"raw_{num_files_written:>06}.json" if num_files_written % cfg.source.files_per_folder == 0: curr_dir = f"dir{num_files_written // cfg.source.files_per_folder :04d}/" os.makedirs(os.path.join(cfg.source.path.FRED.raw, curr_dir), exist_ok=True) with open(os.path.join(*[cfg.source.path.FRED.raw, curr_dir, filename]), "w") as fp: json.dump(list_json, fp, sort_keys=True, indent=4, separators=(",", ": ")) fp.close() with open(os.path.join(cfg.source.path.FRED.meta, "ids_downloaded.txt"), "a") as fp: for j in list_json: fp.write(j["id"]) fp.write("\n") fp.close() num_files_written += 1 list_json = [] if tot_downloaded % 10000 == 0: logger.info(f"Downloaded {tot_downloaded} time series.") except Exception as e: logger.info(f"Failed to download id {id_meta['id']} from fname {fname}.") logger.warning(e) if len(request_time_stamps) > rate_limit: first = request_time_stamps.pop(0) if time.time() - first < sleep_time: #logger.info(f"Sleeping for {request_time_stamps[0]-first}.") time.sleep(request_time_stamps[0]-first) logger.info(f"Written files in directory {d} and currently have {tot_downloaded:>6} time series saved") filename = f"raw_{num_files_written:>06}.json" if num_files_written % cfg.source.files_per_folder == 0: curr_dir = f"dir{num_files_written // cfg.source.files_per_folder :04d}/" os.makedirs(os.path.join(cfg.source.path.FRED.raw, curr_dir), exist_ok=True) with open(os.path.join(*[cfg.source.path.FRED.raw, curr_dir, filename]), "w") as fp: json.dump(list_json, fp, sort_keys=True, indent=4, separators=(",", ": ")) fp.close()
def analysis_observations_child(observations_id): observations = fred.observations(observations_id)["observations"] for item_observations in observations: item_observations["id"] = observations_id insert_sql('observations', item_observations)
def test_fred_series_observations(self): fred.key('ohai') fred.observations() expected = 'http://api.stlouisfed.org/fred/series/observations' params = {'api_key': 'ohai'} self.get.assert_called_with(expected, params=params)
def fredCategory(symbol): return fred.observations(symbol)['observations']