/
runner_QuandlAPI.py
133 lines (127 loc) · 5.8 KB
/
runner_QuandlAPI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# TODO:
# Perform checks on data downloaded
# How to deal with Instr_Start_Date? We keep setting it to Null on accident, rather than using default
# EMF From...Import
from handle_DataSeries import EMF_DataSeries_Handle
from handle_CSV import EMF_CSV_Handle
from handle_QuandlAPI import EMF_QuandlAPI_Handle
from lib_EMF import TEMP_MODE
from lib_QuandlAPI import QuandlCSVColumns, QuandlEditableColumns
from lib_QuandlAPI import QuandlDnldCols, QuandlDnldColIdxs
from lib_QuandlAPI import USE_DEFAULT
from util_CSV import boolify
from util_EMF import get_EMF_settings
from util_QuandlAPI import codify_periodicity
from util_TimeSet import dt_epoch_to_str_Y_M_D, dt_str_Y_M_D_Junk_to_epoch
# System Import...As
import logging as log
class EMF_Quandl_Runner:
def __init__(self, hndl_DB, EMF_mode=TEMP_MODE):
settings = get_EMF_settings(EMF_mode)
self._hndl_CSV = EMF_CSV_Handle( settings['QuandlCSVLoc'],
columnIndexes=QuandlCSVColumns,
editableColumns=QuandlEditableColumns)
self._hndl_DB = hndl_DB
def __download_dataset_singleSeries(self,
Q_DATABASE_CODE=None,
Q_DATASET_CODE=None,
Q_COLUMN_NUM=USE_DEFAULT,
Q_COLLAPSE_INSTR=USE_DEFAULT,
Q_TRANSFORM_INSTR=USE_DEFAULT,
start_date=USE_DEFAULT):
'''
TODOS:
Only Download data that's necessary (i.e. after latest insert) (checking for updates)
'''
# Set Quandl Query Parameters
log.info('QUANDL: Downloading Quandl data for (Database|Dataset): ({0}|{1})'.format(Q_DATABASE_CODE,Q_DATASET_CODE))
hndl_Qndl = EMF_QuandlAPI_Handle(Q_DATABASE_CODE, Q_DATASET_CODE)
# Set Quandl Query Parameters / Column Index
hndl_Qndl.Instr_Col_Idx = Q_COLUMN_NUM
# Set Quandl Query Parameters / Collapse
hndl_Qndl.Instr_Collapse = Q_COLLAPSE_INSTR
# Set Quandl Query Parameters / Transform
hndl_Qndl.Instr_Transform = Q_TRANSFORM_INSTR
# Set Quandl Query Parameters / Start Date
hndl_Qndl.Instr_Start_Date = start_date
# Download Quandl Data
hndl_Qndl.get_data()
try:
log.info('QUANDL: Found {0} points from {1} to {2}. Periodicity: {3}'
.format(hndl_Qndl.Data_Num_Points,
hndl_Qndl.Data_Earliest_Date,
hndl_Qndl.Data_Latest_Date,
hndl_Qndl.Quandl_Periodicity))
if hndl_Qndl.Data_Num_Columns != 2 and Q_COLUMN_NUM == 1:
log.warning('QUANDL: Found more than one column. Ensure you have right column).')
except Exception:
pass
# Return
return hndl_Qndl
def __store_dataset_metadata(self, hndl_Data, hndl_Qndl, CSV_Metadata):
hndl_Data.last_refreshed = dt_str_Y_M_D_Junk_to_epoch(hndl_Qndl.Quandl_Latest_Refresh) # Should make this take the time, not junk it.
hndl_Data.periodicity = codify_periodicity(hndl_Qndl.Data_Periodicity) # Kind of silly to de-code into string and re-encode
hndl_Data.original_periodicity = codify_periodicity(hndl_Qndl.Quandl_Periodicity)
#
hndl_Data.category = CSV_Metadata['category_1']
hndl_Data.subcategory = CSV_Metadata['sub_category_1']
hndl_Data.category_meaning = CSV_Metadata['category_1_meaning']
hndl_Data.is_categorical = boolify(CSV_Metadata['IS_CATEGORICAL'])
hndl_Data.geography = CSV_Metadata['geography']
#
hndl_Data.Quandl_dataset = hndl_Qndl.Quandl_Dataset
hndl_Data.Quandl_database = hndl_Qndl.Quandl_Database
def download_CSV_datasets(self):
MetadataCols = [ 'geography',
'category_1',
'sub_category_1',
'category_1_meaning',
'IS_CATEGORICAL']
MetadataColIdxs = [QuandlCSVColumns[x] for x in MetadataCols]
# For Each Desired Dataset (Using CSV Reader)
for row in self._hndl_CSV:
# Download Quandl Data History and Metadata
download_instr = dict([(name, row[idx]) for (name, idx) in zip(QuandlDnldCols, QuandlDnldColIdxs)])
hndl_Qndl = self.__download_dataset_singleSeries(**download_instr)
# For Each Desired Dataset / If No Error
if hndl_Qndl.error is None:
# For Each Desired Dataset / If No Error / Create Local Data Handle
db_name = row[QuandlCSVColumns['db_name']]
db_ticker = row[QuandlCSVColumns['db_ticker']]
hndl_Data = EMF_DataSeries_Handle(self._hndl_DB, name=db_name, ticker=db_ticker, insertIfNot=True)
# For Each Desired Dataset / If No Error / Read Input CSV Data
metadata = dict([(name, row[idx]) for (name, idx) in zip(MetadataCols, MetadataColIdxs)])
# For Each Desired Dataset / If No Error / Store Data History in DB
hndl_Data.save_series_db(hndl_Qndl.dates, hndl_Qndl.values)
# For Each Desired Dataset / If No Error / Store Metadata in DB
self.__store_dataset_metadata(hndl_Data, hndl_Qndl, metadata)
# For Each Desired Dataset / If No Error / Update CSV Variables
quandl_metadata = {
'Q_COLUMN_NAME' : hndl_Qndl.Data_Chosen_Column,
'Q_REFRESHED_AT' : hndl_Qndl.Quandl_Latest_Refresh,
'Q_EARLIEST_DATE' : hndl_Qndl.Quandl_Earliest_Date,
'Q_LATEST_DATE' : hndl_Qndl.Quandl_Latest_Date,
'Q_PERIODICITY' : hndl_Qndl.Quandl_Periodicity,
'Q_DESCRIPTION' : hndl_Qndl.Quandl_Description,
'Q_NAME' : hndl_Qndl.Data_Chosen_Column,
'NUM_COLUMNS' : hndl_Qndl.Data_Num_Columns,
'NUM_POINTS' : hndl_Qndl.Data_Num_Points,
'ERROR' : hndl_Qndl.error
}
else:
# For Each Desired Dataset / If Error / Update CSV Variables
quandl_metadata = {
'ERROR' : hndl_Qndl.error
}
# For Each Desired Dataset / Write Results to CSV
for (key, val) in quandl_metadata.iteritems():
try:
str(val).decode('utf-8') # To prevent writing bad values
self._hndl_CSV.change_current_row(val, columnName=key)
except UnicodeDecodeError, e:
log.warning('QUANDL: Unicode error writing to csv')
log.warning('QUANDL: Row:{0} Col:{1}'.format(row, key))
except UnicodeEncodeError, e:
log.warning('QUANDL: Unicode error writing to csv')
log.warning('QUANDL: Row:{0} Col:{1}'.format(row, key))
self._hndl_CSV.write_to_csv()