Esempio n. 1
0
def get_series_metadata(series):
    api_call_result = fred.series(series)
    metadata = api_call_result['seriess'][0]
    metadata_dict = {
        'Table Info': ['Title:', 'Units:', 'Frequency:'],
        '': [metadata['title'], metadata['units'], metadata['frequency']]
    }

    return metadata_dict
Esempio n. 2
0
def about_series(series):
    api_call_result = fred.series(series)
    series_metadata = api_call_result['seriess'][0]

    series_detail_dict = {
        'Series Info:': [
            'ID:', 'Title:', 'Obs. Start:', 'Obs. End:', 'Frequency:',
            'Units:', 'Seasonal Adjustment:', 'Last Updated:'
        ],
        '': [
            series_metadata['id'], series_metadata['title'],
            series_metadata['observation_start'],
            series_metadata['observation_end'], series_metadata['frequency'],
            series_metadata['units'], series_metadata['seasonal_adjustment'],
            series_metadata['last_updated']
        ]
    }

    return series_detail_dict
def downloadDataFromFred_fillMetadata(ticker):
	series_data = fred.series(ticker)
	log.info('\tFinding Metadata for %s', ticker)
	if 'error_message' in series_data.keys():
		log.warning('\tError in Finding Metadata for %s', ticker)
		log.warning('\tError: %s', historical_data['error_message'])		
		status = 'ERROR: ' + series_data['error_message']
	else:
		log.info('\tFound Metadata for %s... Writing to DB', ticker)
		# Read Data
		metadata = series_data['seriess'][0]
		# Write Data to Database
		try:
			downloadDataFromFred_fillMetadata_parseSeriesData(ticker, metadata)
		except:
			raise
		# Update Data in CSV
		status = 'ok'
	return status
def seriesFreq(series):
    res = fred.series(series)['seriess'][0]['frequency'].lower()
    if res.find(",") != -1:
        return res[:res.find(",")]
    else:
        return res
def seriesTitle(series):
    return fred.series(series)['seriess'][0]['title']
Esempio n. 6
0
 def test_fred_series_release(self):
     fred.key('abc')
     fred.series(releases=True)
     expected = 'http://api.stlouisfed.org/fred/series/release'
     params = {'api_key': 'abc', 'file_type': 'json'}
     self.get.assert_called_with(expected, params=params)
Esempio n. 7
0
    def refresh(self, bulk=False, skip_to=None, fn=None, no_download=False, ids=None, force=False, **kwargs):
        """
        Reads the associated API and saves data to tables.
        """
        
        if skip_to:
            skip_to = int(skip_to)
        
        tmp_debug = settings.DEBUG
        settings.DEBUG = False
        django.db.transaction.enter_transaction_management()
        django.db.transaction.managed(True)
        
        try:
            if bulk:
                local_fn = self.download_bulk_data(fn=fn, no_download=no_download)
                dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn)
                if dsfile.complete:
                    return
                
                # Process CSV.
                print 'Reading file...'
                sys.stdout.flush()
                source = zipfile.ZipFile(local_fn, 'r')
                if dsfile.total_lines_complete:
                    total = dsfile.total_lines
                    if not skip_to:
                        skip_to = dsfile.total_lines_complete
                else:
                    total = len(source.open(s.BULK_INDEX_FN, 'r').readlines())
                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        complete=False,
                        total_lines=total,
                        total_lines_complete=0,
                        percent=0,
                    )
                django.db.transaction.commit()
                line_iter = iter(source.open(s.BULK_INDEX_FN, 'r'))
                offset = 0
                while 1:
                    try:
                        line = line_iter.next()
                        offset += 1
                        #print 'line:',line.strip()
                        if line.lower().startswith('series '):
                            line_iter.next()
                            offset += 1
                            break
                    except StopIteration:
                        break
                total -= offset
                i = 0
                just_skipped = False
                data = csv.DictReader(line_iter, delimiter=';')
                for row in data:
                    i += 1
                    if skip_to and i < skip_to:
                        if not just_skipped:
                            print
                        print '\rSkipping from %s to %s...' % (i, skip_to),
                        sys.stdout.flush()
                        just_skipped = True
                        continue
                    elif just_skipped:
                        just_skipped = False
                        print
                        
                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        downloaded=True,
                        complete=False,
                        total_lines=total,
                        total_lines_complete=i,
                        percent=i/float(total)*100,
                    )
                    if not i % 10:
                        django.db.transaction.commit()
                        
                    row = dict(
                        (
                            (k or '').strip().lower().replace(' ', '_'),
                            (v or '').strip()
                        )
                        for k,v in row.iteritems()
                    )
                    if not row.get('file'):
                        continue
                    print '\rLoading %s %.02f%% (%i of %i)...' % (row.get('file'), i/float(total)*100, i, total),
                    sys.stdout.flush()
                    row['id'] = row['file'].split('\\')[-1].split('.')[0]
                    section_fn = row['file'] # FRED2_csv_2/data/4/4BIGEURORECP.csv
                    del row['file']
                    if row['last_updated']:
                        row['last_updated'] = dateutil.parser.parse(row['last_updated'])
                        row['last_updated'] = date(row['last_updated'].year, row['last_updated'].month, row['last_updated'].day)
                    #print row
                    series, _ = Series.objects.get_or_create(id=row['id'], defaults=row)
                    series.last_updated = row['last_updated']
                    series_min_date = series.min_date
                    series_max_date = series.max_date
                    prior_series_dates = set(series.data.all().values_list('date', flat=True))
                    
                    if series.max_date and series.last_updated > (series.max_date - timedelta(days=s.LAST_UPDATE_DAYS)):
                        continue
                    elif not section_fn.endswith('.csv'):
                        continue
                    
                    section_fn = 'FRED2_csv_2/data/' + section_fn.replace('\\', '/')
                    #print 'section_fn:',section_fn
                    lines = source.open(section_fn, 'r').readlines()
                    #last_data = None
                    last_data_date = None
                    last_data_value = None
                    total2 = len(source.open(section_fn, 'r').readlines())
                    i2 = 0
                    if s.EXPAND_DATA_TO_DAYS:
                        print
                    series_data_pending = []
                    for row in csv.DictReader(source.open(section_fn, 'r')):
                        i2 += 1
                        if s.EXPAND_DATA_TO_DAYS:
                            print '\r\tLine %.02f%% (%i of %i)' % (i2/float(total2)*100, i2, total2),
                        sys.stdout.flush()
                        row['date'] = dateutil.parser.parse(row['DATE'])
                        row['date'] = date(row['date'].year, row['date'].month, row['date'].day)
                        
#                        series_min_date = min(series_min_date or row['date'], row['date'])
#                        series_max_date = max(series_max_date or row['date'], row['date'])
                        
                        del row['DATE']
                        try:
                            row['value'] = float(row['VALUE'])
                        except ValueError:
                            print
                            print 'Invalid value: "%s"' % (row['VALUE'],)
                            sys.stdout.flush()
                            continue
                        del row['VALUE']
                        #print row
                        
                        if s.EXPAND_DATA_TO_DAYS and last_data_date:
                            intermediate_days = (row['date'] - last_data_date).days
                            #print 'Expanding data to %i intermediate days...' % (intermediate_days,)
                            #sys.stdout.flush()
                            #Data.objects.bulk_create([
                            series_data_pending.extend([
                                Data(series=series, date=last_data_date+timedelta(days=_days), value=last_data_value)
                                for _days in xrange(1, intermediate_days)
                                if (last_data_date+timedelta(days=_days)) not in prior_series_dates
                            ])
                        
                        #data, _ = Data.objects.get_or_create(series=series, date=row['date'], defaults=row)
                        if row['date'] not in prior_series_dates:
                            data = Data(series=series, date=row['date'], value=row['value'])
                            series_data_pending.append(data)
                        #data.save()
                        last_data_date = row['date']
                        last_data_value = row['value']
                    if series_data_pending:
                        Data.objects.bulk_create(series_data_pending)
#                    print '\r\tLine %.02f%% (%i of %i)' % (100, i2, total2),
#                    print
                    series.last_refreshed = date.today()
                    series.save()
                        
                    # Cleanup.
                    django.db.transaction.commit()
                    Series.objects.update()
                    Data.objects.update()
                    gc.collect()
                    
                DataSourceFile.objects.filter(id=dsfile.id).update(
                    complete=True,
                    downloaded=True,
                    total_lines=total,
                    total_lines_complete=total,
                    percent=100,
                )
                    
            else:
                #TODO:use API to download data for each series_id individually
                #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key>
                #TODO:check for revised values using output_type?
                #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type
                if force:
                    if ids:
                        q = Series.objects.all()
                    else:
                        q = Series.objects.get_loadable()
                else:
                    q = Series.objects.get_stale(days=30)
                
                if ids:
                    q = q.filter(id__in=ids)
                fred.key(s.API_KEY)
                i = 0
                total = q.count()
                print '%i series found.' % (total,)
                for series in q.iterator():
                    i += 1
                    print '\rImporting %i of %i' % (i, total),
                    sys.stdout.flush()
                    observation_start = None
                    if series.max_date:
                        observation_start = series.max_date - timedelta(days=7)
                    
                    try:
                        series_info = fred.series(series.id)['seriess'][0]
                    except KeyError:
                        print>>sys.stderr, 'Series %s is missing seriess: %s' % (series.id, fred.series(series.id),)
                        continue
                    except Exception as e:
                        print>>sys.stderr, 'Error on %s: %s' % (series.id, e,)
                        continue
                        
                    #print 'series_info:',series_info
                    last_updated = series_info['last_updated'].strip()
                    series.last_updated = dateutil.parser.parse(last_updated) if last_updated else None
                    series.popularity = series_info['popularity']
                    series.save()
                    
                    try:
                        series_data = fred.observations(
                            series.id,
                            observation_start=observation_start)
                    except ValueError as e:
                        print>>sys.stderr, e
                        continue
                        
                    for data in series_data['observations']:
                        #print series, data['date'], data['value']
                        
                        try:
                            value = float(data['value'])
                        except (ValueError, TypeError) as e:
                            print>>sys.stderr, e
                            continue
                            
                        dt = date(*map(int, data['date'].split('-')))
                        data, created = Data.objects.get_or_create(
                            series=series,
                            date=dt,
                            defaults=dict(value=value))
                        if not created:
                            data.value = value
                            data.save()
                            
                    series = Series.objects.get(id=series.id)
                    if series.last_updated:
                        most_recent_past_date = series.data.filter(date__lte=date.today()).aggregate(Max('date'))['date__max']
                        threshold = series.last_updated - timedelta(days=series.days)
#                        print
#                        print 'most_recent_past_date:',most_recent_past_date
#                        print 'last_updated:',series.last_updated
#                        print 'threshold:',threshold
                        if most_recent_past_date:
                            if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1:
                                #TODO: Is this a safe assumption? Might not matter for series without future data.
                                series.date_is_start = True
                            elif most_recent_past_date >= threshold:
                                series.date_is_start = False
                            else:
                                series.date_is_start = True
                    series.last_refreshed = date.today()
                    series.save()
                    
                    if force:
                        series.data.all().update(start_date_inclusive=None, end_date_inclusive=None)
                        
                    missing_dates = series.data.filter(Q(start_date_inclusive__isnull=True)|Q(end_date_inclusive__isnull=True))
                    print 'Updating %i date ranges.' % (missing_dates.count(),)
                    for _ in missing_dates.iterator():
                        _.set_date_range()
                        _.save()
                    
                    django.db.transaction.commit()
                print
        finally:
            #print "Committing..."
            settings.DEBUG = tmp_debug
            django.db.transaction.commit()
            django.db.transaction.leave_transaction_management()
Esempio n. 8
0
 def test_fred_series_release(self):
     fred.key('abc')
     fred.series(releases=True)
     expected = 'https://api.stlouisfed.org/fred/series/release'
     params = {'api_key': 'abc', 'file_type': 'json'}
     self.get.assert_called_with(expected, params=params)
Esempio n. 9
0
 def test_fred_series(self):
     fred.key('abc')
     fred.series()
     expected = 'http://api.stlouisfed.org/fred/series'
     params = {'api_key': 'abc'}
     self.get.assert_called_with(expected, params=params)
Esempio n. 10
0
    def refresh(self,
                bulk=False,
                skip_to=None,
                fn=None,
                no_download=False,
                ids=None,
                force=False,
                **kwargs):
        """
        Reads the associated API and saves data to tables.
        """

        if skip_to:
            skip_to = int(skip_to)

        tmp_debug = settings.DEBUG
        settings.DEBUG = False
        django.db.transaction.enter_transaction_management()
        django.db.transaction.managed(True)

        try:
            if bulk:
                local_fn = self.download_bulk_data(fn=fn,
                                                   no_download=no_download)
                dsfile, _ = DataSourceFile.objects.get_or_create(name=local_fn)
                if dsfile.complete:
                    return

                # Process CSV.
                print('Reading file...')
                sys.stdout.flush()
                source = zipfile.ZipFile(local_fn, 'r')
                if dsfile.total_lines_complete:
                    total = dsfile.total_lines
                    if not skip_to:
                        skip_to = dsfile.total_lines_complete
                else:
                    total = len(source.open(s.BULK_INDEX_FN, 'r').readlines())
                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        complete=False,
                        total_lines=total,
                        total_lines_complete=0,
                        percent=0,
                    )
                django.db.transaction.commit()
                line_iter = iter(source.open(s.BULK_INDEX_FN, 'r'))
                offset = 0
                while 1:
                    try:
                        line = line_iter.next()
                        offset += 1
                        if line.lower().startswith('series '):
                            line_iter.next()
                            offset += 1
                            break
                    except StopIteration:
                        break
                total -= offset
                i = 0
                just_skipped = False
                data = csv.DictReader(line_iter, delimiter=';')
                for row in data:
                    i += 1
                    if skip_to and i < skip_to:
                        if not just_skipped:
                            print
                        print('\rSkipping from %s to %s...' % (i, skip_to))
                        sys.stdout.flush()
                        just_skipped = True
                        continue
                    elif just_skipped:
                        just_skipped = False
                        print

                    DataSourceFile.objects.filter(id=dsfile.id).update(
                        downloaded=True,
                        complete=False,
                        total_lines=total,
                        total_lines_complete=i,
                        percent=i / float(total) * 100,
                    )
                    if not i % 10:
                        django.db.transaction.commit()

                    row = dict(((k or '').strip().lower().replace(' ', '_'),
                                (v or '').strip()) for k, v in row.iteritems())
                    if not row.get('file'):
                        continue
                    print('\rLoading %s %.02f%% (%i of %i)...' %
                          (row.get('file'), i / float(total) * 100, i, total))
                    sys.stdout.flush()
                    row['id'] = row['file'].split('\\')[-1].split('.')[0]
                    section_fn = row[
                        'file']  # FRED2_csv_2/data/4/4BIGEURORECP.csv
                    del row['file']
                    if row['last_updated']:
                        row['last_updated'] = dateutil.parser.parse(
                            row['last_updated'])
                        row['last_updated'] = date(row['last_updated'].year,
                                                   row['last_updated'].month,
                                                   row['last_updated'].day)
                    series, _ = Series.objects.get_or_create(id=row['id'],
                                                             defaults=row)
                    series.last_updated = row['last_updated']
                    series_min_date = series.min_date
                    series_max_date = series.max_date
                    prior_series_dates = set(series.data.all().values_list(
                        'date', flat=True))

                    if series.max_date and series.last_updated > (
                            series.max_date -
                            timedelta(days=s.LAST_UPDATE_DAYS)):
                        continue
                    elif not section_fn.endswith('.csv'):
                        continue

                    section_fn = 'FRED2_csv_2/data/' + section_fn.replace(
                        '\\', '/')
                    lines = source.open(section_fn, 'r').readlines()
                    #last_data = None
                    last_data_date = None
                    last_data_value = None
                    total2 = len(source.open(section_fn, 'r').readlines())
                    i2 = 0
                    if s.EXPAND_DATA_TO_DAYS:
                        print
                    series_data_pending = []
                    for row2 in csv.DictReader(source.open(section_fn, 'r')):
                        i2 += 1
                        if s.EXPAND_DATA_TO_DAYS:
                            print('\r\tLine %.02f%% (%i of %i)' %
                                  (i2 / float(total2) * 100, i2, total2))
                        sys.stdout.flush()
                        row2['date'] = dateutil.parser.parse(row2['DATE'])
                        row2['date'] = date(row2['date'].year,
                                            row2['date'].month,
                                            row2['date'].day)

                        #                        series_min_date = min(series_min_date or row2['date'], row2['date'])
                        #                        series_max_date = max(series_max_date or row2['date'], row2['date'])

                        del row2['DATE']
                        try:
                            row2['value'] = float(row2['VALUE'])
                        except ValueError:
                            print
                            print('Invalid value: "%s"' % (row2['VALUE'], ))
                            sys.stdout.flush()
                            continue
                        del row2['VALUE']

                        if s.EXPAND_DATA_TO_DAYS and last_data_date:
                            intermediate_days = (row2['date'] -
                                                 last_data_date).days
                            #print 'Expanding data to %i intermediate days...' % (intermediate_days,)
                            #sys.stdout.flush()
                            #Data.objects.bulk_create([
                            series_data_pending.extend([
                                Data(series=series,
                                     date=last_data_date +
                                     timedelta(days=_days),
                                     value=last_data_value)
                                for _days in range(1, intermediate_days)
                                if (last_data_date + timedelta(days=_days)
                                    ) not in prior_series_dates
                            ])

                        #data, _ = Data.objects.get_or_create(series=series, date=row2['date'], defaults=row2)
                        if row2['date'] not in prior_series_dates:
                            data = Data(series=series,
                                        date=row2['date'],
                                        value=row2['value'])
                            series_data_pending.append(data)
                        #data.save()
                        last_data_date = row2['date']
                        last_data_value = row2['value']
                    if series_data_pending:
                        Data.objects.bulk_create(series_data_pending)

                    series.last_refreshed = date.today()
                    series.save()

                    # Cleanup.
                    django.db.transaction.commit()
                    Series.objects.update()
                    Data.objects.update()
                    gc.collect()

                DataSourceFile.objects.filter(id=dsfile.id).update(
                    complete=True,
                    downloaded=True,
                    total_lines=total,
                    total_lines_complete=total,
                    percent=100,
                )

            else:
                #TODO:use API to download data for each series_id individually
                #e.g. http://api.stlouisfed.org/fred/series/observations?series_id=DEXUSEU&api_key=<api_key>
                #TODO:check for revised values using output_type?
                #http://api.stlouisfed.org/docs/fred/series_observations.html#output_type
                if force:
                    if ids:
                        q = Series.objects.all()
                    else:
                        q = Series.objects.get_loadable()
                else:
                    q = Series.objects.get_stale(days=30)

                if ids:
                    q = q.filter(id__in=ids)
                fred.key(s.API_KEY)
                i = 0
                total = q.count()
                print('%i series found.' % (total, ))
                for series in q.iterator():
                    i += 1
                    sys.stdout.write('\rImporting %i of %i' % (i, total))
                    sys.stdout.flush()
                    observation_start = None
                    if series.max_date:
                        observation_start = series.max_date - timedelta(days=7)

                    try:
                        series_info = fred.series(series.id)['seriess'][0]
                    except KeyError:
                        print('Series %s is missing seriess: %s' % (
                            series.id,
                            fred.series(series.id),
                        ),
                              file=sys.stderr)
                        continue
                    except Exception as e:
                        print('Error on %s: %s' % (
                            series.id,
                            e,
                        ),
                              file=sys.stderr)
                        continue

                    last_updated = series_info['last_updated'].strip()
                    series.last_updated = dateutil.parser.parse(
                        last_updated) if last_updated else None
                    series.popularity = series_info['popularity']
                    series.save()

                    try:
                        series_data = fred.observations(
                            series.id, observation_start=observation_start)
                    except ValueError as e:
                        print('Error getting observations: %s' % e,
                              file=sys.stderr)
                        continue

                    for data in series_data['observations']:

                        try:
                            value = float(data['value'])
                        except (ValueError, TypeError) as e:
                            print('Error converting to float: %s' %
                                  data['value'],
                                  file=sys.stderr)
                            continue

                        dt = date(*map(int, data['date'].split('-')))
                        data, created = Data.objects.get_or_create(
                            series=series, date=dt, defaults=dict(value=value))
                        if not created:
                            data.value = value
                            data.save()

                    series = Series.objects.get(id=series.id)
                    if series.last_updated:
                        most_recent_past_date = series.data.filter(
                            date__lte=date.today()).aggregate(
                                Max('date'))['date__max']
                        threshold = series.last_updated - timedelta(
                            days=series.days)
                        if most_recent_past_date:
                            if series.frequency == c.QUARTERLY and most_recent_past_date.day == 1:
                                #TODO: Is this a safe assumption? Might not matter for series without future data.
                                series.date_is_start = True
                            elif most_recent_past_date >= threshold:
                                series.date_is_start = False
                            else:
                                series.date_is_start = True
                    series.last_refreshed = date.today()
                    series.save()

                    if force:
                        series.data.all().update(start_date_inclusive=None,
                                                 end_date_inclusive=None)

                    missing_dates = series.data.filter(
                        Q(start_date_inclusive__isnull=True)
                        | Q(end_date_inclusive__isnull=True))
                    print('Updating %i date ranges.' %
                          (missing_dates.count(), ))
                    for _ in missing_dates.iterator():
                        _.set_date_range()
                        _.save()

                    django.db.transaction.commit()
                print
        finally:
            settings.DEBUG = tmp_debug
            django.db.transaction.commit()
            django.db.transaction.leave_transaction_management()