def get_fred_series(series):
	def filter(o):
		return {'date': datetime.strptime(o['date'],'%Y-%m-%d').date(),
			    series: o['value']}

	return pd.DataFrame(map(filter,fred.observations(series)['observations']),
                         dtype='float64').set_index('date').dropna()
Example #2
0
def get_fred_series(series):
	def filter(o):
		return {'date': datetime.strptime(o['date'],'%Y-%m-%d').date(),
			    series: o['value']}

	
	return pd.DataFrame(map(filter,fred.observations(series)['observations']),dtype='float64').set_index('date').dropna()
Example #3
0
 def loadDatesForLiborRates(self, term, numTerm):
     series = self.convertTermNumTermToLiborSeries(term, numTerm)
     values = fred.observations(series)['observations']['observation']
     dates = []
     for value in values:
         dates.append(value['date'])
     return dates
def firstObs(series):
    min_date = datetime.datetime.today()
    for obs in fred.observations(series)['observations']:
        date_ = datetime.datetime.strptime(obs['date'], "%Y-%m-%d")
        if date_ < min_date:
            min_date = date_
    return min_date
def lastObs(series):
    max_date = datetime.datetime.strptime("1919-01-01", "%Y-%m-%d")
    for obs in fred.observations(series)['observations']:
        date_ = datetime.datetime.strptime(obs['date'], "%Y-%m-%d")
        if date_ > max_date:
            max_date = date_
    return max_date
Example #6
0
def fred_series_mult_pd(symbol_list, api_key):
    """
    Use fred api to retrieve time series data.
    Args:
        symbol_list: list of strings representing fred series symbols
        api_key: developer API key from https://fred.stlouisfed.org/
    Returns:
        merged pandas dataframe with date ('dt') and numeric value (<symbol>) columns
    Dependencies:
        pandas, fred, functools.reduce
    """
    # Use API key
    fred.key(api_key)

    # Define inner functions
    def inner_join_pandas_list(pandas_list, join_on):
        return reduce(lambda a, b: pd.merge(a, b, on=join_on), pandas_list)

    # Query each series in symbol_list, append to df_list
    df_list = []
    for symbol in symbol_list:
        # Import series, convert to dataframe, drop query timestamps, rename columns, format dates
        series_df = pd.DataFrame(fred.observations(symbol)['observations']).\
                                 drop(['realtime_start','realtime_end'], axis = 1).\
                                 rename(columns = {'date' : 'dt', 'value' : symbol.lower()})
        series_df['dt'] = [x.date() for x in pd.to_datetime(series_df['dt'])]
        df_list.append(series_df)

    # Merge list of dataframes
    return inner_join_pandas_list(df_list, 'dt')
Example #7
0
def get_fred(id_str):
    """get_fred takes the FRED key for a series as a parameter e.g. 'PCE96' for real 
    consumer spending, and returns a list of dictionaries. There is a dictionary
    for each date. Dictionary keys are date, realtime_start, realtime_end, value"""   
  
    fred_key1 = "observations"
    fred_key2 = "observation"
    return fred.observations(id_str)[fred_key1]
def getSeriesDataTuple(series):
    dates_values = []
    for obs in fred.observations(series)['observations']:
        date_ = datetime.datetime.strptime(obs['date'], "%Y-%m-%d")
        date_ = datetime.datetime.date(date_)
        value = obs['value']
        dates_values.append((date_, value))
    return dates_values
Example #9
0
    def get_hour_value(self):
        fred.key(settings.FRED_KEY)
        last_observation = fred.observations(
			settings.FRED_SERIES)['observations'][-1]
        h = last_observation['value']
        try:
            return float(h)
        except:
            return settings.DEFAULT_HOUR_VALUE_IN_USD
Example #10
0
 def get_hour_value(self):
     fred.key(settings.FRED_KEY)
     last_observation = fred.observations(
         settings.FRED_SERIES)['observations'][-1]
     h = last_observation['value']
     try:
         return float(h)
     except:
         return settings.DEFAULT_HOUR_VALUE_IN_USD
Example #11
0
 def download(self):
     self.dfList = []
     for tag in self.tags:
         self.obsList = []
         obsRaw = fred.observations(tag[0], observation_start=self.obsStart, observation_end=self.obsEnd, units=tag[1])
         for obs in obsRaw['observations']:
             self.obsList.append((obs['date'], obs['value']))
         df =  pd.DataFrame(self.obsList, columns=['DATE', tag[0]])
         self.dfList.append(df)
     self.data = self.mergeByDate(self.dfList)
     self.data = self.data.apply(pd.to_numeric, errors='ignore')
Example #12
0
def get_series_data(series, years=5):
    today = datetime.today().strftime('%Y-%m-%d')
    five_years_ago = (
        datetime.today() -
        timedelta(days=years * days_in_a_year)).strftime('%Y-%m-%d')

    api_call_result = fred.observations(series,
                                        observation_start=five_years_ago,
                                        observation_end=today)

    series_data = _unpack_series_data(series, api_call_result)

    return series_data
Example #13
0
 def set_hour_rate(self, h=None):
     if h:
         self.hour = h
         print "Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour
         self.set_currenc_rates()
         print "Done."
     else:
         fred.key('0c9a5ec8dd8c63ab8cbec6514a8f5b37')
         last_observation = fred.observations(
             settings.FRED_SERIES)['observations'][-1]
         h = last_observation['value']
         try:
             self.hour = float(h)
             print "Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour
         except:
             self.hour = 25.
             print "Failed to retrieve rates from FRED API. Assuming 1h = 25usd."
Example #14
0
 def set_hour_rate(self, h=None):
     if h:
         self.hour = h
         print("Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour)
         self.set_currenc_rates()
         print("Done.")
     else:
         fred.key('0c9a5ec8dd8c63ab8cbec6514a8f5b37')
         last_observation = fred.observations(
             settings.FRED_SERIES)['observations'][-1]
         h = last_observation['value']
         try:
             self.hour = float(h)
             print("Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour)
         except:
             self.hour = 25.
             print("Failed to retrieve rates from FRED API. Assuming 1h = 25usd.")
def downloadDataFromFred_fillHistory(ticker):
	log.info('\tFinding Historical Data for %s', ticker)
	historical_data = fred.observations(ticker)
	if 'error_message' in historical_data.keys():
		log.warning('\tError in Finding Historical Data for %s', ticker)
		log.warning('\tError: %s', historical_data['error_message'])
		status = 'ERROR: ' + historical_data['error_message']
	else:
		log.info('\tFound %d Historical Data Points for %s... Writing to DB', len(historical_data['observations']), ticker)
		# Write Data to Database
		try:
			# TODO: Check if we should even fill (i.e. if latest date is more than today's date)
			downloadDataFromFred_fillHistory_parseSeriesData(ticker, historical_data)
		except:
			raise
		# Update Data in CSV
		status = 'ok'
	return status
Example #16
0
def getObs(l, *args):  #takes list from searchTitle and gets obs

    if l == []: return {}
    if not args:
        sels = raw_input('Enter selection: ')
        sels = sels.split(' ')
        if '0' in sels: sels = range(1, len(l))
    else: sels = [1]

    obs = {}  #dict{'seriesID': {'date':value}}
    for i in sels:
        try:
            res = fred.observations(l[(int(i))][1])
            tempD = {}
            for item in res['observations']:
                tempD[item['date']] = item['value']
            obs[l[(int(i))][1]] = tempD
        except:
            print 'Invalid data selection: ', i

    return obs
Example #17
0
    def loadAllLiborRates(self, term, numTerm):
        ''' 
        loads timeSeries of rates for one instrument return dictionary with dates and values
        if value is '.' then previous value is taken
        '''
        series = self.convertTermNumTermToLiborSeries(term, numTerm)
        values = fred.observations(series)['observations']['observation']
        datesRatesVector = {}
        prevValue = 0.0
        for value in values:
            badValue = False
            try:
                float(value['value'])
            except ValueError:
                badValue = True
#            print str(numTerm)+str(term)+' '+str(badValue)+' '+str(value['date'])+' '+str(value['value']) + ' ' +str(prevValue)
            if badValue:
                datesRatesVector[value['date']] = prevValue
            else:
                datesRatesVector[value['date']] = value['value']
                prevValue = value['value']
        return datesRatesVector
Example #18
0
    def set_hour_rate(self, h=None):
        if h:
            self.hour = h
            print("Hour value of work (self.hour) was set to %s usd from FRED API. Retrieving currency rates.." % self.hour)
            self.set_currenc_rates()
            print("Done.")

        else:
            if not settings.FRED_KEY:
                print("Set settings FRED_KEY. Get one at https://fred.stlouisfed.org")

            fred.key(settings.FRED_KEY)

            last_observation = fred.observations(settings.FRED_SERIES)['observations'][-1]

            h = last_observation['value']

            try:
                self.hour = float(h)
                print("Hour value of work (self.hour) was set to %s usd from FRED API." % self.hour)
            except:
                self.hour = 28.18
                print("Failed to retrieve rates from FRED API. Assuming 1h = 28.18 usd.")
Example #19
0
def source_FRED(credentials, small_sample:bool=False, id_freq_list_path:str="") -> None:
    """
    Source the full FRED dataset and save to files. https://fred.stlouisfed.org/
    """
    # Setup directories f they do not exist
    # TODO

    # Create fred connection using api-key
    fred.key(credentials.API_KEY_FED.key)

    if small_sample:
        try:
            if id_freq_list_path == "":
                filename="dummy_id_freq_list.json"
                with open(cfg.source.path.FRED.meta + filename, "r") as fp:
                    ids_freqs = json.load(fp)
            else:
                try:
                    with open(id_freq_list_path, "r") as fp:
                        ids_freqs = json.load(fp)
                except Exception as e:
                    logger.warning(e)
                    logger.warning(f"Not able to read provided file in path {id_freq_list_path}.")
            logger.info("Using precomputed list for retrieval from FRED.")
        except Exception as e:
            logger.info(e)
            logger.info("Not able to find predefined list of ids. Crawling FRED instead.") 
            # Crawl to get a full list of available time series.
            ids_freqs = {}
            for s in fred.category_series(33936)["seriess"]:
                ids_freqs[s["id"]] = s["frequency_short"]
            
            filename="dummy_id_freq_list.json"
            #path = os.path.join(cfg.source.path.FRED.meta, filename)
            with open(cfg.source.path.FRED.meta + filename, "w") as fp:
                json.dump(ids_freqs, fp, sort_keys=True, indent=4, separators=(",", ": "))
        
        # Download and save all time series. saving each sample as a JSON
        for id in ids_freqs.keys():
            observations = fred.observations(id)
            json_out = {
                "source" : "FRED",
                "id" : id,
                "frequency" : ids_freqs[id],
                "values" : [float(obs["value"]) for obs in observations["observations"]]
            }
            filename=f"{id}.json"
            #path = os.path.join(cfg.source.path.FRED.raw, filename)
            with open(cfg.source.path.FRED.raw + filename, "w") as fp:
                json.dump(json_out, fp)
            
        # Statistics of sourcing

        # Random dummy data for preprocessing
        num_preprocessed = 0
        for i in range(10000):
            if num_preprocessed % 1000 == 0:
                curr_dir = f"dir{num_preprocessed // 1000 :03d}/"
                os.makedirs(cfg.source.path.FRED.raw + curr_dir, exist_ok=True)
            out = {
                "source" : "FRED",
                "id" : f"{i:04d}",
                "frequency" : np.random.choice(["Y", "Q", "M", "W", "D", "H"]),
                "values" : list(np.random.rand(100)),
            }
            filename = f"{i:04d}.json"
            with open(cfg.source.path.FRED.raw + curr_dir + filename, "w") as fp:
                json.dump(out, fp)
            num_preprocessed += 1

    else:
        # Crawl to get a full list of available time series.
        # save every n minutes to avoid having to go redo...
        #if not os.path.isfile(os.path.join(cfg.source.path.FRED.meta, "ids_freq_list_test.json")):
        #logger.info("Crawling FRED.")
        #crawl_fred(api_key=credentials.API_KEY_FED.key, nodes_to_visit=[0], sleep_time=cfg.source.api.FRED.sleep, rate_limit=cfg.source.api.FRED.limit)
        #logger.info("Done crawling.")
        #path = os.path.join(cfg.source.path.FRED.meta, "ids_meta.json")

        logger.info(f"Downloading.")
        download_ids(api_key=credentials.API_KEY_FED.key, sleep_time=cfg.source.api.FRED.sleep, rate_limit=cfg.source.api.FRED.limit)
    def refresh(self,
                bulk=False,
                skip_to=None,
                fn=None,
                no_download=False,
                ids=None,
                force=False,
                **kwargs):
        """
        Reads the associated API and saves data to tables.
        """

        if skip_to:
            skip_to = int(skip_to)

        tmp_debug = settings.DEBUG
        settings.DEBUG = False
        django.db.transaction.enter_transaction_management()
        django.db.transaction.managed(True)

        try:
            if bulk:
                local_fn = self.download_bulk_data(fn=fn,
                                                   no_download=no_download)
                dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn)
                if dsfile.complete:
                    return

                # Process CSV.
                print('Reading file...')
                sys.stdout.flush()
                source = zipfile.ZipFile(local_fn, 'r')
                if dsfile.total_lines_complete:
                    total = dsfile.total_lines
                    if not skip_to:
                        skip_to = dsfile.total_lines_complete
                else:
                    total = len(source.open(s.BULK_INDEX_FN, 'r').readlines())
                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        complete=False,
                        total_lines=total,
                        total_lines_complete=0,
                        percent=0,
                    )
                django.db.transaction.commit()
                line_iter = iter(source.open(s.BULK_INDEX_FN, 'r'))
                offset = 0
                while 1:
                    try:
                        line = line_iter.next()
                        offset += 1
                        if line.lower().startswith('series '):
                            line_iter.next()
                            offset += 1
                            break
                    except StopIteration:
                        break
                total -= offset
                i = 0
                just_skipped = False
                data = csv.DictReader(line_iter, delimiter=';')
                for row in data:
                    i += 1
                    if skip_to and i < skip_to:
                        if not just_skipped:
                            print
                        print('\rSkipping from %s to %s...' % (i, skip_to))
                        sys.stdout.flush()
                        just_skipped = True
                        continue
                    elif just_skipped:
                        just_skipped = False
                        print

                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        downloaded=True,
                        complete=False,
                        total_lines=total,
                        total_lines_complete=i,
                        percent=i / float(total) * 100,
                    )
                    if not i % 10:
                        django.db.transaction.commit()

                    row = dict(((k or '').strip().lower().replace(' ', '_'),
                                (v or '').strip()) for k, v in row.iteritems())
                    if not row.get('file'):
                        continue
                    print('\rLoading %s %.02f%% (%i of %i)...' %
                          (row.get('file'), i / float(total) * 100, i, total))
                    sys.stdout.flush()
                    row['id'] = row['file'].split('\\')[-1].split('.')[0]
                    section_fn = row[
                        'file']  # FRED2_csv_2/data/4/4BIGEURORECP.csv
                    del row['file']
                    if row['last_updated']:
                        row['last_updated'] = dateutil.parser.parse(
                            row['last_updated'])
                        row['last_updated'] = date(row['last_updated'].year,
                                                   row['last_updated'].month,
                                                   row['last_updated'].day)
                    series, _ = Series.objects.get_or_create(id=row['id'],
                                                             defaults=row)
                    series.last_updated = row['last_updated']
                    series_min_date = series.min_date
                    series_max_date = series.max_date
                    prior_series_dates = set(series.data.all().values_list(
                        'date', flat=True))

                    if series.max_date and series.last_updated > (
                            series.max_date -
                            timedelta(days=s.LAST_UPDATE_DAYS)):
                        continue
                    elif not section_fn.endswith('.csv'):
                        continue

                    section_fn = 'FRED2_csv_2/data/' + section_fn.replace(
                        '\\', '/')
                    lines = source.open(section_fn, 'r').readlines()
                    #last_data = None
                    last_data_date = None
                    last_data_value = None
                    total2 = len(source.open(section_fn, 'r').readlines())
                    i2 = 0
                    if s.EXPAND_DATA_TO_DAYS:
                        print
                    series_data_pending = []
                    for row2 in csv.DictReader(source.open(section_fn, 'r')):
                        i2 += 1
                        if s.EXPAND_DATA_TO_DAYS:
                            print('\r\tLine %.02f%% (%i of %i)' %
                                  (i2 / float(total2) * 100, i2, total2))
                        sys.stdout.flush()
                        row2['date'] = dateutil.parser.parse(row2['DATE'])
                        row2['date'] = date(row2['date'].year,
                                            row2['date'].month,
                                            row2['date'].day)

                        #                        series_min_date = min(series_min_date or row2['date'], row2['date'])
                        #                        series_max_date = max(series_max_date or row2['date'], row2['date'])

                        del row2['DATE']
                        try:
                            row2['value'] = float(row2['VALUE'])
                        except ValueError:
                            print
                            print('Invalid value: "%s"' % (row2['VALUE'], ))
                            sys.stdout.flush()
                            continue
                        del row2['VALUE']

                        if s.EXPAND_DATA_TO_DAYS and last_data_date:
                            intermediate_days = (row2['date'] -
                                                 last_data_date).days
                            #print 'Expanding data to %i intermediate days...' % (intermediate_days,)
                            #sys.stdout.flush()
                            #Data.objects.bulk_create([
                            series_data_pending.extend([
                                Data(series=series,
                                     date=last_data_date +
                                     timedelta(days=_days),
                                     value=last_data_value)
                                for _days in range(1, intermediate_days)
                                if (last_data_date + timedelta(days=_days)
                                    ) not in prior_series_dates
                            ])

                        #data, _ = Data.objects.get_or_create(series=series, date=row2['date'], defaults=row2)
                        if row2['date'] not in prior_series_dates:
                            data = Data(series=series,
                                        date=row2['date'],
                                        value=row2['value'])
                            series_data_pending.append(data)
                        #data.save()
                        last_data_date = row2['date']
                        last_data_value = row2['value']
                    if series_data_pending:
                        Data.objects.bulk_create(series_data_pending)

                    series.last_refreshed = date.today()
                    series.save()

                    # Cleanup.
                    django.db.transaction.commit()
                    Series.objects.update()
                    Data.objects.update()
                    gc.collect()

                DataSourceFile.objects.filter(id=dsfile.id).update(
                    complete=True,
                    downloaded=True,
                    total_lines=total,
                    total_lines_complete=total,
                    percent=100,
                )

            else:
                #TODO:use API to download data for each series_id individually
                #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key>
                #TODO:check for revised values using output_type?
                #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type
                if force:
                    if ids:
                        q = Series.objects.all()
                    else:
                        q = Series.objects.get_loadable()
                else:
                    q = Series.objects.get_stale(days=30)

                if ids:
                    q = q.filter(id__in=ids)
                fred.key(s.API_KEY)
                i = 0
                total = q.count()
                print('%i series found.' % (total, ))
                for series in q.iterator():
                    i += 1
                    sys.stdout.write('\rImporting %i of %i' % (i, total))
                    sys.stdout.flush()
                    observation_start = None
                    if series.max_date:
                        observation_start = series.max_date - timedelta(days=7)

                    try:
                        series_info = fred.series(series.id)['seriess'][0]
                    except KeyError:
                        print('Series %s is missing seriess: %s' % (
                            series.id,
                            fred.series(series.id),
                        ),
                              file=sys.stderr)
                        continue
                    except Exception as e:
                        print('Error on %s: %s' % (
                            series.id,
                            e,
                        ),
                              file=sys.stderr)
                        continue

                    last_updated = series_info['last_updated'].strip()
                    series.last_updated = dateutil.parser.parse(
                        last_updated) if last_updated else None
                    series.popularity = series_info['popularity']
                    series.save()

                    try:
                        series_data = fred.observations(
                            series.id, observation_start=observation_start)
                    except ValueError as e:
                        print('Error getting observations: %s' % e,
                              file=sys.stderr)
                        continue

                    for data in series_data['observations']:

                        try:
                            value = float(data['value'])
                        except (ValueError, TypeError) as e:
                            print('Error converting to float: %s' %
                                  data['value'],
                                  file=sys.stderr)
                            continue

                        dt = date(*map(int, data['date'].split('-')))
                        data, created = Data.objects.get_or_create(
                            series=series, date=dt, defaults=dict(value=value))
                        if not created:
                            data.value = value
                            data.save()

                    series = Series.objects.get(id=series.id)
                    if series.last_updated:
                        most_recent_past_date = series.data.filter(
                            date__lte=date.today()).aggregate(
                                Max('date'))['date__max']
                        threshold = series.last_updated - timedelta(
                            days=series.days)
                        if most_recent_past_date:
                            if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1:
                                #TODO: Is this a safe assumption? Might not matter for series without future data.
                                series.date_is_start = True
                            elif most_recent_past_date >= threshold:
                                series.date_is_start = False
                            else:
                                series.date_is_start = True
                    series.last_refreshed = date.today()
                    series.save()

                    if force:
                        series.data.all().update(start_date_inclusive=None,
                                                 end_date_inclusive=None)

                    missing_dates = series.data.filter(
                        Q(start_date_inclusive__isnull=True)
                        | Q(end_date_inclusive__isnull=True))
                    print('Updating %i date ranges.' %
                          (missing_dates.count(), ))
                    for _ in missing_dates.iterator():
                        _.set_date_range()
                        _.save()

                    django.db.transaction.commit()
                print
        finally:
            settings.DEBUG = tmp_debug
            django.db.transaction.commit()
            django.db.transaction.leave_transaction_management()
Example #21
0
 def test_fred_series_observations(self):
     fred.key('ohai')
     fred.observations("AAA")
     expected = 'http://api.stlouisfed.org/fred/series/observations'
     params = {'api_key': 'ohai', 'series_id': 'AAA', 'file_type': 'json'}
     self.get.assert_called_with(expected, params=params)
    def refresh(self, bulk=False, skip_to=None, fn=None, no_download=False, ids=None, force=False, **kwargs):
        """
        Reads the associated API and saves data to tables.
        """
        
        if skip_to:
            skip_to = int(skip_to)
        
        tmp_debug = settings.DEBUG
        settings.DEBUG = False
        django.db.transaction.enter_transaction_management()
        django.db.transaction.managed(True)
        
        try:
            if bulk:
                local_fn = self.download_bulk_data(fn=fn, no_download=no_download)
                dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn)
                if dsfile.complete:
                    return
                
                # Process CSV.
                print 'Reading file...'
                sys.stdout.flush()
                source = zipfile.ZipFile(local_fn, 'r')
                if dsfile.total_lines_complete:
                    total = dsfile.total_lines
                    if not skip_to:
                        skip_to = dsfile.total_lines_complete
                else:
                    total = len(source.open(s.BULK_INDEX_FN, 'r').readlines())
                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        complete=False,
                        total_lines=total,
                        total_lines_complete=0,
                        percent=0,
                    )
                django.db.transaction.commit()
                line_iter = iter(source.open(s.BULK_INDEX_FN, 'r'))
                offset = 0
                while 1:
                    try:
                        line = line_iter.next()
                        offset += 1
                        #print 'line:',line.strip()
                        if line.lower().startswith('series '):
                            line_iter.next()
                            offset += 1
                            break
                    except StopIteration:
                        break
                total -= offset
                i = 0
                just_skipped = False
                data = csv.DictReader(line_iter, delimiter=';')
                for row in data:
                    i += 1
                    if skip_to and i < skip_to:
                        if not just_skipped:
                            print
                        print '\rSkipping from %s to %s...' % (i, skip_to),
                        sys.stdout.flush()
                        just_skipped = True
                        continue
                    elif just_skipped:
                        just_skipped = False
                        print
                        
                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        downloaded=True,
                        complete=False,
                        total_lines=total,
                        total_lines_complete=i,
                        percent=i/float(total)*100,
                    )
                    if not i % 10:
                        django.db.transaction.commit()
                        
                    row = dict(
                        (
                            (k or '').strip().lower().replace(' ', '_'),
                            (v or '').strip()
                        )
                        for k,v in row.iteritems()
                    )
                    if not row.get('file'):
                        continue
                    print '\rLoading %s %.02f%% (%i of %i)...' % (row.get('file'), i/float(total)*100, i, total),
                    sys.stdout.flush()
                    row['id'] = row['file'].split('\\')[-1].split('.')[0]
                    section_fn = row['file'] # FRED2_csv_2/data/4/4BIGEURORECP.csv
                    del row['file']
                    if row['last_updated']:
                        row['last_updated'] = dateutil.parser.parse(row['last_updated'])
                        row['last_updated'] = date(row['last_updated'].year, row['last_updated'].month, row['last_updated'].day)
                    #print row
                    series, _ = Series.objects.get_or_create(id=row['id'], defaults=row)
                    series.last_updated = row['last_updated']
                    series_min_date = series.min_date
                    series_max_date = series.max_date
                    prior_series_dates = set(series.data.all().values_list('date', flat=True))
                    
                    if series.max_date and series.last_updated > (series.max_date - timedelta(days=s.LAST_UPDATE_DAYS)):
                        continue
                    elif not section_fn.endswith('.csv'):
                        continue
                    
                    section_fn = 'FRED2_csv_2/data/' + section_fn.replace('\\', '/')
                    #print 'section_fn:',section_fn
                    lines = source.open(section_fn, 'r').readlines()
                    #last_data = None
                    last_data_date = None
                    last_data_value = None
                    total2 = len(source.open(section_fn, 'r').readlines())
                    i2 = 0
                    if s.EXPAND_DATA_TO_DAYS:
                        print
                    series_data_pending = []
                    for row in csv.DictReader(source.open(section_fn, 'r')):
                        i2 += 1
                        if s.EXPAND_DATA_TO_DAYS:
                            print '\r\tLine %.02f%% (%i of %i)' % (i2/float(total2)*100, i2, total2),
                        sys.stdout.flush()
                        row['date'] = dateutil.parser.parse(row['DATE'])
                        row['date'] = date(row['date'].year, row['date'].month, row['date'].day)
                        
#                        series_min_date = min(series_min_date or row['date'], row['date'])
#                        series_max_date = max(series_max_date or row['date'], row['date'])
                        
                        del row['DATE']
                        try:
                            row['value'] = float(row['VALUE'])
                        except ValueError:
                            print
                            print 'Invalid value: "%s"' % (row['VALUE'],)
                            sys.stdout.flush()
                            continue
                        del row['VALUE']
                        #print row
                        
                        if s.EXPAND_DATA_TO_DAYS and last_data_date:
                            intermediate_days = (row['date'] - last_data_date).days
                            #print 'Expanding data to %i intermediate days...' % (intermediate_days,)
                            #sys.stdout.flush()
                            #Data.objects.bulk_create([
                            series_data_pending.extend([
                                Data(series=series, date=last_data_date+timedelta(days=_days), value=last_data_value)
                                for _days in xrange(1, intermediate_days)
                                if (last_data_date+timedelta(days=_days)) not in prior_series_dates
                            ])
                        
                        #data, _ = Data.objects.get_or_create(series=series, date=row['date'], defaults=row)
                        if row['date'] not in prior_series_dates:
                            data = Data(series=series, date=row['date'], value=row['value'])
                            series_data_pending.append(data)
                        #data.save()
                        last_data_date = row['date']
                        last_data_value = row['value']
                    if series_data_pending:
                        Data.objects.bulk_create(series_data_pending)
#                    print '\r\tLine %.02f%% (%i of %i)' % (100, i2, total2),
#                    print
                    series.last_refreshed = date.today()
                    series.save()
                        
                    # Cleanup.
                    django.db.transaction.commit()
                    Series.objects.update()
                    Data.objects.update()
                    gc.collect()
                    
                DataSourceFile.objects.filter(id=dsfile.id).update(
                    complete=True,
                    downloaded=True,
                    total_lines=total,
                    total_lines_complete=total,
                    percent=100,
                )
                    
            else:
                #TODO:use API to download data for each series_id individually
                #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key>
                #TODO:check for revised values using output_type?
                #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type
                if force:
                    if ids:
                        q = Series.objects.all()
                    else:
                        q = Series.objects.get_loadable()
                else:
                    q = Series.objects.get_stale(days=30)
                
                if ids:
                    q = q.filter(id__in=ids)
                fred.key(s.API_KEY)
                i = 0
                total = q.count()
                print '%i series found.' % (total,)
                for series in q.iterator():
                    i += 1
                    print '\rImporting %i of %i' % (i, total),
                    sys.stdout.flush()
                    observation_start = None
                    if series.max_date:
                        observation_start = series.max_date - timedelta(days=7)
                    
                    try:
                        series_info = fred.series(series.id)['seriess'][0]
                    except KeyError:
                        print>>sys.stderr, 'Series %s is missing seriess: %s' % (series.id, fred.series(series.id),)
                        continue
                    except Exception as e:
                        print>>sys.stderr, 'Error on %s: %s' % (series.id, e,)
                        continue
                        
                    #print 'series_info:',series_info
                    last_updated = series_info['last_updated'].strip()
                    series.last_updated = dateutil.parser.parse(last_updated) if last_updated else None
                    series.popularity = series_info['popularity']
                    series.save()
                    
                    try:
                        series_data = fred.observations(
                            series.id,
                            observation_start=observation_start)
                    except ValueError as e:
                        print>>sys.stderr, e
                        continue
                        
                    for data in series_data['observations']:
                        #print series, data['date'], data['value']
                        
                        try:
                            value = float(data['value'])
                        except (ValueError, TypeError) as e:
                            print>>sys.stderr, e
                            continue
                            
                        dt = date(*map(int, data['date'].split('-')))
                        data, created = Data.objects.get_or_create(
                            series=series,
                            date=dt,
                            defaults=dict(value=value))
                        if not created:
                            data.value = value
                            data.save()
                            
                    series = Series.objects.get(id=series.id)
                    if series.last_updated:
                        most_recent_past_date = series.data.filter(date__lte=date.today()).aggregate(Max('date'))['date__max']
                        threshold = series.last_updated - timedelta(days=series.days)
#                        print
#                        print 'most_recent_past_date:',most_recent_past_date
#                        print 'last_updated:',series.last_updated
#                        print 'threshold:',threshold
                        if most_recent_past_date:
                            if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1:
                                #TODO: Is this a safe assumption? Might not matter for series without future data.
                                series.date_is_start = True
                            elif most_recent_past_date >= threshold:
                                series.date_is_start = False
                            else:
                                series.date_is_start = True
                    series.last_refreshed = date.today()
                    series.save()
                    
                    if force:
                        series.data.all().update(start_date_inclusive=None, end_date_inclusive=None)
                        
                    missing_dates = series.data.filter(Q(start_date_inclusive__isnull=True)|Q(end_date_inclusive__isnull=True))
                    print 'Updating %i date ranges.' % (missing_dates.count(),)
                    for _ in missing_dates.iterator():
                        _.set_date_range()
                        _.save()
                    
                    django.db.transaction.commit()
                print
        finally:
            #print "Committing..."
            settings.DEBUG = tmp_debug
            django.db.transaction.commit()
            django.db.transaction.leave_transaction_management()
Example #23
0
 def test_fred_series_observations(self):
     fred.key('ohai')
     fred.observations("AAA")
     expected = 'https://api.stlouisfed.org/fred/series/observations'
     params = {'api_key': 'ohai', 'series_id': 'AAA', 'file_type': 'json'}
     self.get.assert_called_with(expected, params=params)
Example #24
0
def download_ids(api_key:str, sleep_time:int=60, rate_limit:int=100) -> None:
    """
    Downloading all time series in the provided JSON file.
    """ 
    fred.key(api_key)
    #num_requests = 0
    tot_downloaded = 0
    num_files_written = 0
    list_json = []
    request_time_stamps = []
    #curr_dir = f"dir{tot_downloaded // cfg.source.files_per_folder :04d}/"

    dirs = glob.glob(cfg.source.path.FRED.meta + "/*")
    for d in dirs:
        file_names = glob.glob(d + "/*")
        for fname in file_names:
            with open(fname, "r") as fp:
                ids_meta = json.load(fp)
                fp.close()

            for id_meta in ids_meta:
                try:
                    observations = fred.observations(id_meta["id"])
                    request_time_stamps.append(time.time())
                    #num_requests += 1
                    ts = {
                        "id" : id_meta["id"],
                        "source" : id_meta["source"],
                        "node_id" : id_meta["node_id"],
                        "category_name" : id_meta["category_name"],
                        "parent_id" : id_meta["parent_id"],
                        "frequency" : id_meta["frequency"],
                        "observations" : [{"date" : obs["date"], "value" : obs["value"]} for obs in observations["observations"]]
                    }

                    tot_downloaded += 1
                    list_json.append(ts)
                    if len(list_json) > cfg.source.samples_per_json:
                        filename = f"raw_{num_files_written:>06}.json"
                        if num_files_written % cfg.source.files_per_folder == 0:
                            curr_dir = f"dir{num_files_written // cfg.source.files_per_folder :04d}/"
                            os.makedirs(os.path.join(cfg.source.path.FRED.raw, curr_dir), exist_ok=True)
                        with open(os.path.join(*[cfg.source.path.FRED.raw, curr_dir, filename]), "w") as fp:
                            json.dump(list_json, fp, sort_keys=True, indent=4, separators=(",", ": "))
                            fp.close()
                        
                        with open(os.path.join(cfg.source.path.FRED.meta, "ids_downloaded.txt"), "a") as fp:
                            for j in list_json:
                                fp.write(j["id"])
                                fp.write("\n")
                            fp.close()

                        num_files_written += 1
                        list_json = []

                    if tot_downloaded % 10000 == 0:
                        logger.info(f"Downloaded {tot_downloaded} time series.")

                except Exception as e:
                    logger.info(f"Failed to download id {id_meta['id']} from fname {fname}.")
                    logger.warning(e)

                if len(request_time_stamps) > rate_limit:
                    first = request_time_stamps.pop(0)
                    if time.time() - first < sleep_time:
                        #logger.info(f"Sleeping for {request_time_stamps[0]-first}.")
                        time.sleep(request_time_stamps[0]-first)
      
        logger.info(f"Written files in directory {d} and currently have {tot_downloaded:>6} time series saved")


    filename = f"raw_{num_files_written:>06}.json"
    if num_files_written % cfg.source.files_per_folder == 0:
        curr_dir = f"dir{num_files_written // cfg.source.files_per_folder :04d}/"
        os.makedirs(os.path.join(cfg.source.path.FRED.raw, curr_dir), exist_ok=True)
    with open(os.path.join(*[cfg.source.path.FRED.raw, curr_dir, filename]), "w") as fp:
        json.dump(list_json, fp, sort_keys=True, indent=4, separators=(",", ": "))
        fp.close()
Example #25
0
def analysis_observations_child(observations_id):
    observations = fred.observations(observations_id)["observations"]
    for item_observations in observations:
        item_observations["id"] = observations_id
        insert_sql('observations', item_observations)
Example #26
0
 def test_fred_series_observations(self):
     fred.key('ohai')
     fred.observations()
     expected = 'http://api.stlouisfed.org/fred/series/observations'
     params = {'api_key': 'ohai'}
     self.get.assert_called_with(expected, params=params)
Example #27
0
def fredCategory(symbol):
    return fred.observations(symbol)['observations']