def findAndStoreDataSeries(self, ticker): assert self.dataSeriesID is None # Force reset before setting (Or new object) self.dataSeriesID = EM_DBHelp.retrieve_DataSeriesID( self.db_conn, self.db_curs, dataTicker=ticker, insertIfNot=False) self.dataPeriodicity = self.__getFromDB('int_data_periodicity')
def downloadDataFromFred_fillHistory_parseSeriesData(dataSeriesTicker, data): dataSeriesID = EM_DBHelp.retrieve_DataSeriesID(db_connection, db_cursor, dataTicker=dataSeriesTicker, insertIfNot=True) sendToDB = lambda: \ EM_DBHelp.update_DataSeriesMetaData(db_connection, db_cursor, columnName, value, seriesID=dataSeriesID) successfulInserts = 0 unsuccessfulInserts = 0 minObsDate = maxint maxObsDate = -maxint-1 # Update History for obs in data['observations']: if obs['value'] == '.': continue # Avoid Unfilled FRED Values date = EM_util.dtConvert_YYYY_MM_DDtoEpoch(obs['date']) value = float(obs['value']) maxObsDate = date if date > maxObsDate else maxObsDate minObsDate = date if date < minObsDate else minObsDate log.info('\t\tWriting Historical Data Point at %s for %s [value = %f]', dataSeriesTicker, obs['date'], value) (success, error) = EM_DBHelp.insertDataPoint_DataHistoryTable( db_connection, db_cursor, dataSeriesID, date, value, isInterpolated=False) if not success: log.warning('\t\tFailed to Write Historical Data Point at %s for %s [value = %f]', dataSeriesTicker, obs['date'], obs['value']) unsuccessfulInserts += 1 else: successfulInserts +=1 log.info('\t%d Historical Data Points Written Successfuly for %s', successfulInserts, dataSeriesTicker) log.info('\t%d Historical Data Points Written Failed for %s', unsuccessfulInserts, dataSeriesTicker) log.info('\tDate range from %s to %s Written for %s', EM_util.dtConvert_EpochtoY_M_D(minObsDate), EM_util.dtConvert_EpochtoY_M_D(maxObsDate), dataSeriesTicker) # Update Successful Inserts Flag columnName = 'int_unsuccessful_inserts' value = unsuccessfulInserts sendToDB() # Update Earliest DataPoint Found columnName = 'dt_earliest_value' value = minObsDate sendToDB() # Update Latest DataPoint Found columnName = 'dt_latest_value' value = maxObsDate sendToDB() # Update Last Update Date columnName = 'dt_last_updated_history' value = EM_util.dtGetNowAsEpoch() sendToDB()
def convertDataHistoryToJSON(ticker, dataSeriesID=None): global file_format if dataSeriesID is None: dataSeriesID = EM_DBHelp.retrieve_DataSeriesID( db_conn, db_curs, dataTicker=ticker, insertIfNot=False) dataSeries = EM_DBHelp.getCompleteDataHistory_DataHistoryTable( db_conn, db_curs, dataSeriesID) if file_format[0] == 'rs': convertValue = lambda (dt, vl): {'x': dt, 'y': float(vl)} elif file_format[0] == 'd3': convertValue = lambda (dt, vl): {'date': dtConvert_EpochtoY_M_D(dt), 'value': float(vl)} else: raise Exception('File Format Type not recognized for date/value JSON') return map(convertValue, dataSeries)
def writeDataHistorytoJSON(ticker): dataSeriesID = EM_DBHelp.retrieve_DataSeriesID( db_conn, db_curs, dataTicker=ticker, insertIfNot=False) data_JSON = convertDataHistoryToJSON(ticker, dataSeriesID=dataSeriesID) columnName = 'dt_earliest_value' earliestData = EM_DGUtil.getFromDB() columnName = 'dt_latest_value' latestData = EM_DGUtil.getFromDB() columnName = 'dt_last_updated_history' lastUpdate = EM_DGUtil.getFromDB() writeFile = generateDateHistoryFilename(ticker, earliestData, latestData, lastUpdate) updateAvailableSeriesDataFile(ticker, earliestData, latestData, lastUpdate, dataFileName=writeFile) try: writer = open(writeFile, 'wb') writer.write(json_dump(data_JSON)) except: raise finally: writer.close()
def downloadDataFromFred( csvFileName=EM_util.FREDSeriesCSV, fillHistory=True, fillMetadata=True, fillUserData=True, minImportance=2, writeEveryRow=True, pause=False): # Access Global Variables global db_cursor global db_connection # Establish Helpful Lambdas sendToDB = lambda: \ EM_DBHelp.update_DataSeriesMetaData(db_connection, db_cursor, columnName, value, seriesID=dataSeriesID) # Read CSV file log.info('Accessing CSV to get Series Tickers...') with open(csvFileName, 'rU') as csvfile: series_csv = csv.reader(csvfile) header = [next(series_csv)] # Ignore header in CSV write_list = [list(row) for row in series_csv] log.info('Downloading FRED data to database...') for i in range(len(write_list)): # Recognize End of File without Reaching Deprecated if write_list[i][FREDCol['TICKER_COL']] == '': break if int(write_list[i][FREDCol['IMPORTANCE_COL']]) > minImportance: continue lastHistoryDownload = datetime.datetime.strptime( write_list[i][FREDCol['HISTORY_TIMESTAMP_COL']], EM_util.FREDSeriesCSVTimeFormat) dnldHistory = ((fillHistory and \ (datetime.datetime.now()-lastHistoryDownload) > EM_util.FREDDownloadDelayHistory) or \ write_list[i][FREDCol['FORCE_HISTORY_REDOWNLOAD_COL']] == EM_util.FREDForceDownload) and \ write_list[i][FREDCol['FORCE_HISTORY_REDOWNLOAD_COL']] != EM_util.FREDSkipDownload lastMetadataDownload = datetime.datetime.strptime( write_list[i][FREDCol['HISTORY_TIMESTAMP_COL']], EM_util.FREDSeriesCSVTimeFormat) dnldMetadata = ((fillMetadata and\ (datetime.datetime.now()-lastMetadataDownload) > EM_util.FREDDownloadDelayMetadata) or \ write_list[i][FREDCol['FORCE_METADATA_REDOWNLOAD_COL']] == EM_util.FREDForceDownload) and \ write_list[i][FREDCol['FORCE_METADATA_REDOWNLOAD_COL']] != EM_util.FREDSkipDownload ticker = write_list[i][FREDCol['TICKER_COL']] log.info('Downloading %s data to database...', ticker) if dnldHistory: status = downloadDataFromFred_fillHistory(ticker) write_list[i][FREDCol['HISTORY_STATUS_COL']] = status write_list[i][FREDCol['HISTORY_TIMESTAMP_COL']] = \ datetime.datetime.now().strftime(EM_util.FREDSeriesCSVTimeFormat) if dnldMetadata: status = downloadDataFromFred_fillMetadata(ticker) write_list[i][FREDCol['METADATA_STATUS_COL']] = status write_list[i][FREDCol['METADATA_TIMESTAMP_COL']] = \ datetime.datetime.now().strftime(EM_util.FREDSeriesCSVTimeFormat) # TODO: Decide when to fillUserData if fillUserData: # We seek the Series ID again to make sure it was input correctly earlier # (Rather than just passing it back from earlier) dataSeriesID = EM_DBHelp.retrieve_DataSeriesID(db_connection, db_cursor, dataTicker=ticker, insertIfNot=False) columnName = 'code_data_subtype' csvVal = write_list[i][FREDCol['SUBTYPE_COL']] if csvVal == '': csvVal = 'unknown' value = int(EM_util.dataSubtypes[csvVal]) sendToDB() columnName = 'code_data_type' csvVal = write_list[i][FREDCol['TYPE_COL']] if csvVal == '': csvVal = 'unknown' value = int(EM_util.dataTypes[csvVal]) sendToDB() columnName = 'code_data_supertype' csvVal = write_list[i][FREDCol['SUPERTYPE_COL']] if csvVal == '': csvVal = 'unknown' value = int(EM_util.dataSupertypes[csvVal]) sendToDB() columnName = 'code_is_level' csvVal = write_list[i][FREDCol['IS_LEVEL_COL']] if csvVal == '': csvVal = 'unknown' value = int(EM_util.levelMattersTypes[csvVal]) sendToDB() columnName = 'code_good_direction' csvVal = write_list[i][FREDCol['POSITIVE_IS_GOOD_COL']] if csvVal == '': csvVal = 'unknown' value = int(EM_util.goodDirectionTypes[csvVal]) sendToDB() # TODO: This is hacky. Do better. if writeEveryRow: with open(csvFileName[:-4]+'_temp.csv', 'wb') as milestone_csvfile: log.info('Updating Series CSV File...') csvwrite = csv.writer(milestone_csvfile) csvwrite.writerows(header) csvwrite.writerows(write_list) log.info('CSV file updated...') # For Testing... TODO: Remove if pause: statement = raw_input('\nPress Enter to Continue...\n') log.info('Downloaded Series data sucessfully...') with open(csvFileName, 'wb') as csvfile: log.info('Updating Series CSV File...') csvwrite = csv.writer(csvfile) csvwrite.writerows(header) csvwrite.writerows(write_list) log.info('CSV file updated...')
def downloadDataFromFred_fillMetadata_parseSeriesData(dataSeriesTicker, data): #Get Data Series ID dataSeriesID = EM_DBHelp.retrieve_DataSeriesID(db_connection, db_cursor, dataTicker=dataSeriesTicker, insertIfNot=True) sendToDB = lambda: \ EM_DBHelp.update_DataSeriesMetaData(db_connection, db_cursor, columnName, value, seriesID=dataSeriesID) # Update Data Series Name columnName = 'txt_data_name' value = data['title'].encode('ascii', 'ignore') sendToDB() columnName = 'txt_data_source' value = 'FRED' sendToDB() # Update Data Series Periodicity dataFrequency = data['frequency_short'] if dataFrequency == 'M': value = 12 elif dataFrequency == 'Q': value = 4 elif dataFrequency == 'A': value = 1 elif dataFrequency == 'W': value = 52 elif dataFrequency == 'D': value = 365 else: raise NotImplementedError('Data Frequency not recognized') columnName = 'int_data_periodicity' sendToDB() # Update Data Series Seasonal Adjustment dataSeasonalAdjustment = data['seasonal_adjustment_short'] if dataSeasonalAdjustment == 'SA' or dataSeasonalAdjustment == 'SAAR': value = 1 elif dataSeasonalAdjustment == 'NSA': value = 0 else: raise NotImplementedError('Data Seasonal Adjustment Code not recognized') columnName = 'bool_is_seasonally_adjusted' sendToDB() # Update Data Series Last Updated value = EM_util.dtConvert_YYYY_MM_DD_TimetoEpoch(data['last_updated']) columnName = 'dt_last_updated_SOURCE' sendToDB() # Update Data Series First Value value = EM_util.dtConvert_YYYY_MM_DDtoEpoch(data['observation_start']) columnName = 'dt_earliest_value_SOURCE' sendToDB() # Update Data Series Last Value value = EM_util.dtConvert_YYYY_MM_DDtoEpoch(data['observation_end']) columnName = 'dt_latest_value_SOURCE' sendToDB() # Fill Generated Flag (Always False) value = 0 columnName = 'bool_generated_datapoint' sendToDB() # Update Last Update Date columnName = 'dt_last_updated_metadata' value = EM_util.dtGetNowAsEpoch() sendToDB() # Update Information From Units # TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST! # TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST! # BELOW HERE WE CHANGE TO NOT ACTUALLY UPDATE DB. # TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST! # TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST! # Save Off Data Series Real/Nominal Flag for Later Use dataSeriesIsRealValue = None def sendToDB(): print data['title'] + '|' + data['units'] + '|' + columnName + ' : ' + str(value) #TEST dataUnits = data['units'] done = 0 if dataUnits == 'Percent': dataUnitsValue = 'Percent' dataTypeValueValue = 'Percent' dataSeriesIsRealValue = False #Consider, true if something like unemployment rate, false if bond yield done = 1 elif dataUnits == 'Number': dataUnitsValue = 'Number' dataTypeValue = 'Number' dataSeriesIsRealValue = False #Consider, true? done = 1 elif dataUnits == 'Index': dataUnitsValue = 'Number' dataTypeValue = 'Index' dataSeriesIsRealValue = False done = 1 matchObj = None if done else re.match(of_matchObj, dataUnits) if (matchObj is not None): dataUnitsValue = matchObj.group(1) dataTypeValue = matchObj.group(2) innerMatchObj = re.match(ChainedDollars_matchObj,dataTypeValue) if (innerMatchObj is not None): dataSeriesIsRealValue = True dataTypeValue = 'Dollars' else: dataSeriesIsRealValue = False done = 1 matchObj = None if done else re.match(per_matchObj, dataUnits) if (matchObj is not None): dataUnitsValue = 'per ' + matchObj.group(2) dataTypeValue = matchObj.group(1) done = 1 matchObj = None if done else re.match(IndexEquals_matchObj, dataUnits) if (matchObj is not None): dataUnitsValue = 'Number' dataTypeValue = 'Index' done = 1 if dataSeriesIsRealValue is None: pass # raise NotImplementedError # dataSeriesIsRealValue = #TODO: regex: Read data series name to determine (if it has the word 'real') columnName = 'bool_is_real' value = dataSeriesIsRealValue sendToDB() columnName = 'bool_is_normalized' #TODO: fill value = None sendToDB() columnName = 'code_private_public' #TODO: fill value = None sendToDB() columnName = 'code_economic_activity' #TODO: fill value = None sendToDB() columnName = 'code_data_adjustment' #TODO: fill value = None sendToDB() columnName = 'code_sector' #TODO: fill value = None sendToDB() columnName = 'code_data_units' #TODO: fill value = None sendToDB() columnName = 'code_item_type' #TODO: fill value = None sendToDB() columnName = 'txt_data_original_source' #TODO: fill value = None sendToDB()