Ejemplo n.º 1
0
def downloadDataFromFred_fillHistory_parseSeriesData(dataSeriesTicker, data):
	dataSeriesID =	EM_DBHelp.retrieve_DataSeriesID(db_connection, db_cursor, 
													dataTicker=dataSeriesTicker, 
													insertIfNot=True)
	sendToDB = lambda: \
	EM_DBHelp.update_DataSeriesMetaData(db_connection, db_cursor, columnName, value, seriesID=dataSeriesID)

	successfulInserts = 0
	unsuccessfulInserts = 0
	minObsDate = maxint
	maxObsDate = -maxint-1
	# Update History
	for obs in data['observations']:
		if obs['value'] == '.':
			continue # Avoid Unfilled FRED Values
		date = EM_util.dtConvert_YYYY_MM_DDtoEpoch(obs['date'])
		value = float(obs['value'])
		maxObsDate = date if date > maxObsDate else maxObsDate
		minObsDate = date if date < minObsDate else minObsDate
		log.info('\t\tWriting Historical Data Point at %s for %s [value = %f]', dataSeriesTicker, obs['date'], value)
		(success, error) = EM_DBHelp.insertDataPoint_DataHistoryTable(	
													db_connection, db_cursor,  
													dataSeriesID, 
													date, 
													value, 
													isInterpolated=False)
		if not success:
			log.warning('\t\tFailed to Write Historical Data Point at %s for %s [value = %f]', dataSeriesTicker, obs['date'], obs['value'])
			unsuccessfulInserts += 1
		else:
			successfulInserts +=1
	
	log.info('\t%d Historical Data Points Written Successfuly for %s', successfulInserts, dataSeriesTicker)
	log.info('\t%d Historical Data Points Written Failed for %s', unsuccessfulInserts, dataSeriesTicker)
	log.info('\tDate range from %s to %s Written for %s', EM_util.dtConvert_EpochtoY_M_D(minObsDate), EM_util.dtConvert_EpochtoY_M_D(maxObsDate), dataSeriesTicker)

	# Update Successful Inserts Flag
	columnName = 'int_unsuccessful_inserts'
	value = unsuccessfulInserts
	sendToDB()

	# Update Earliest DataPoint Found
	columnName = 'dt_earliest_value'
	value = minObsDate
	sendToDB()

	# Update Latest DataPoint Found
	columnName = 'dt_latest_value'
	value = maxObsDate
	sendToDB()

	# Update Last Update Date
	columnName = 'dt_last_updated_history'
	value = EM_util.dtGetNowAsEpoch()
	sendToDB()
Ejemplo n.º 2
0
def downloadDataFromFred(	csvFileName=EM_util.FREDSeriesCSV, 
							fillHistory=True, 
							fillMetadata=True, 
							fillUserData=True, 
							minImportance=2, 
							writeEveryRow=True,
							pause=False):
	# Access Global Variables
	global db_cursor
	global db_connection

	# Establish Helpful Lambdas
	sendToDB = lambda: \
	EM_DBHelp.update_DataSeriesMetaData(db_connection, db_cursor, columnName, value, seriesID=dataSeriesID)

	# Read CSV file
	log.info('Accessing CSV to get Series Tickers...')
	with open(csvFileName, 'rU') as csvfile:
		series_csv = csv.reader(csvfile)
		header = [next(series_csv)] # Ignore header in CSV
		write_list = [list(row) for row in series_csv]
		log.info('Downloading FRED data to database...')
		for i in range(len(write_list)):
			# Recognize End of File without Reaching Deprecated
			if write_list[i][FREDCol['TICKER_COL']] == '':
				break
			if int(write_list[i][FREDCol['IMPORTANCE_COL']]) > minImportance:
				continue
			lastHistoryDownload = 	datetime.datetime.strptime(
									write_list[i][FREDCol['HISTORY_TIMESTAMP_COL']],
									EM_util.FREDSeriesCSVTimeFormat)
			dnldHistory = 	((fillHistory and \
							(datetime.datetime.now()-lastHistoryDownload) > EM_util.FREDDownloadDelayHistory) or \
							write_list[i][FREDCol['FORCE_HISTORY_REDOWNLOAD_COL']] == EM_util.FREDForceDownload)  and \
							write_list[i][FREDCol['FORCE_HISTORY_REDOWNLOAD_COL']] != EM_util.FREDSkipDownload

			lastMetadataDownload = 	datetime.datetime.strptime(
									write_list[i][FREDCol['HISTORY_TIMESTAMP_COL']],
									EM_util.FREDSeriesCSVTimeFormat)
			dnldMetadata = 	((fillMetadata and\
							(datetime.datetime.now()-lastMetadataDownload) > EM_util.FREDDownloadDelayMetadata) or \
							write_list[i][FREDCol['FORCE_METADATA_REDOWNLOAD_COL']] == EM_util.FREDForceDownload)  and \
							write_list[i][FREDCol['FORCE_METADATA_REDOWNLOAD_COL']] != EM_util.FREDSkipDownload

			ticker = write_list[i][FREDCol['TICKER_COL']]
			log.info('Downloading %s data to database...', ticker)
			if dnldHistory:
				status = downloadDataFromFred_fillHistory(ticker)
				write_list[i][FREDCol['HISTORY_STATUS_COL']] = status
				write_list[i][FREDCol['HISTORY_TIMESTAMP_COL']] = \
					datetime.datetime.now().strftime(EM_util.FREDSeriesCSVTimeFormat)
			if dnldMetadata:
				status = downloadDataFromFred_fillMetadata(ticker)
				write_list[i][FREDCol['METADATA_STATUS_COL']] = status
				write_list[i][FREDCol['METADATA_TIMESTAMP_COL']] = \
					datetime.datetime.now().strftime(EM_util.FREDSeriesCSVTimeFormat)
			# TODO: Decide when to fillUserData
			if fillUserData:
				# We seek the Series ID again to make sure it was input correctly earlier
				# (Rather than just passing it back from earlier)	
				dataSeriesID = EM_DBHelp.retrieve_DataSeriesID(db_connection, db_cursor, 
																dataTicker=ticker, 
																insertIfNot=False)
				columnName = 'code_data_subtype'
				csvVal = write_list[i][FREDCol['SUBTYPE_COL']]
				if csvVal == '': csvVal = 'unknown'			
				value = int(EM_util.dataSubtypes[csvVal])
				sendToDB()
				
				columnName = 'code_data_type'
				csvVal = write_list[i][FREDCol['TYPE_COL']]
				if csvVal == '': csvVal = 'unknown'
				value = int(EM_util.dataTypes[csvVal])
				sendToDB()
				
				columnName = 'code_data_supertype'
				csvVal = write_list[i][FREDCol['SUPERTYPE_COL']]
				if csvVal == '': csvVal = 'unknown'
				value = int(EM_util.dataSupertypes[csvVal])
				sendToDB()

				columnName = 'code_is_level'
				csvVal = write_list[i][FREDCol['IS_LEVEL_COL']]
				if csvVal == '': csvVal = 'unknown'
				value = int(EM_util.levelMattersTypes[csvVal])
				sendToDB()

				columnName = 'code_good_direction'
				csvVal = write_list[i][FREDCol['POSITIVE_IS_GOOD_COL']]
				if csvVal == '': csvVal = 'unknown'
				value = int(EM_util.goodDirectionTypes[csvVal])
				sendToDB()
			# TODO: This is hacky. Do better.
			if writeEveryRow:
				with open(csvFileName[:-4]+'_temp.csv', 'wb') as milestone_csvfile:
					log.info('Updating Series CSV File...')
					csvwrite = csv.writer(milestone_csvfile)
					csvwrite.writerows(header)
					csvwrite.writerows(write_list)
					log.info('CSV file updated...')

			# For Testing... TODO: Remove
			if pause: 
				statement = raw_input('\nPress Enter to Continue...\n') 

		log.info('Downloaded Series data sucessfully...')

	with open(csvFileName, 'wb') as csvfile:
		log.info('Updating Series CSV File...')
		csvwrite = csv.writer(csvfile)
		csvwrite.writerows(header)
		csvwrite.writerows(write_list)
		log.info('CSV file updated...')
Ejemplo n.º 3
0
def downloadDataFromFred_fillMetadata_parseSeriesData(dataSeriesTicker, data):
	#Get Data Series ID
	dataSeriesID =	EM_DBHelp.retrieve_DataSeriesID(db_connection, db_cursor, 
													dataTicker=dataSeriesTicker, 
													insertIfNot=True)
	sendToDB = lambda: \
	EM_DBHelp.update_DataSeriesMetaData(db_connection, db_cursor, columnName, value, seriesID=dataSeriesID)

	# Update Data Series Name
	columnName = 'txt_data_name'
	value = data['title'].encode('ascii', 'ignore')
	sendToDB()

	columnName = 'txt_data_source'
	value = 'FRED'
	sendToDB()

	# Update Data Series Periodicity
	dataFrequency = data['frequency_short']
	if dataFrequency == 'M':
		value = 12
	elif dataFrequency == 'Q':
		value = 4
	elif dataFrequency == 'A':
		value = 1
	elif dataFrequency == 'W':
		value = 52		
	elif dataFrequency == 'D':
		value = 365
	else:
		raise NotImplementedError('Data Frequency not recognized')
	columnName = 'int_data_periodicity'
	sendToDB()

	# Update Data Series Seasonal Adjustment
	dataSeasonalAdjustment = data['seasonal_adjustment_short']
	if dataSeasonalAdjustment == 'SA' or dataSeasonalAdjustment == 'SAAR':
		value = 1
	elif dataSeasonalAdjustment == 'NSA':
		value = 0
	else:
		raise NotImplementedError('Data Seasonal Adjustment Code not recognized')
	columnName = 'bool_is_seasonally_adjusted'
	sendToDB()

	# Update Data Series Last Updated
	value = EM_util.dtConvert_YYYY_MM_DD_TimetoEpoch(data['last_updated'])
	columnName = 'dt_last_updated_SOURCE'
	sendToDB()

	# Update Data Series First Value
	value = EM_util.dtConvert_YYYY_MM_DDtoEpoch(data['observation_start'])
	columnName = 'dt_earliest_value_SOURCE'
	sendToDB()

	# Update Data Series Last Value
	value = EM_util.dtConvert_YYYY_MM_DDtoEpoch(data['observation_end'])
	columnName = 'dt_latest_value_SOURCE'
	sendToDB()
	
	# Fill Generated Flag (Always False)
	value = 0
	columnName = 'bool_generated_datapoint'
	sendToDB()

	# Update Last Update Date
	columnName = 'dt_last_updated_metadata'
	value = EM_util.dtGetNowAsEpoch()
	sendToDB()

	# Update Information From Units	
	# TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST!
	# TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST!
	# BELOW HERE WE CHANGE TO NOT ACTUALLY UPDATE DB.
	# TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST!
	# TEST! TEST! TEST! TEST! TEST! TEST! TEST! TEST!

	# Save Off Data Series Real/Nominal Flag for Later Use
	dataSeriesIsRealValue = None	
	def sendToDB(): print data['title'] + '|' + data['units'] + '|' + columnName + ' : ' + str(value) #TEST
	dataUnits = data['units']
	done = 0
	if dataUnits == 'Percent':
		dataUnitsValue = 'Percent'
		dataTypeValueValue = 'Percent'
		dataSeriesIsRealValue = False #Consider, true if something like unemployment rate, false if bond yield
		done = 1
	elif dataUnits == 'Number':
		dataUnitsValue = 'Number'
		dataTypeValue = 'Number'
		dataSeriesIsRealValue = False #Consider, true?
		done = 1
	elif dataUnits == 'Index':
		dataUnitsValue = 'Number'
		dataTypeValue = 'Index'
		dataSeriesIsRealValue = False
		done = 1
	matchObj = None if done else re.match(of_matchObj, dataUnits)
	if (matchObj is not None):
		dataUnitsValue = matchObj.group(1)
		dataTypeValue = matchObj.group(2)
		innerMatchObj = re.match(ChainedDollars_matchObj,dataTypeValue)
		if (innerMatchObj is not None):
			dataSeriesIsRealValue = True
			dataTypeValue = 'Dollars'
		else:
			dataSeriesIsRealValue = False
		done = 1
	matchObj = None if done else re.match(per_matchObj, dataUnits)
	if (matchObj is not None):
		dataUnitsValue = 'per ' + matchObj.group(2)
		dataTypeValue = matchObj.group(1)
		done = 1
	matchObj = None if done else re.match(IndexEquals_matchObj, dataUnits)
	if (matchObj is not None):
		dataUnitsValue = 'Number'
		dataTypeValue = 'Index'
		done = 1

	if dataSeriesIsRealValue is None:
		pass
		# raise NotImplementedError 
		# dataSeriesIsRealValue = #TODO: regex: Read data series name to determine (if it has the word 'real')
	columnName = 'bool_is_real' 
	value = dataSeriesIsRealValue
	sendToDB()
	columnName = 'bool_is_normalized' #TODO: fill
	value = None
	sendToDB()
	columnName = 'code_private_public' #TODO: fill
	value = None
	sendToDB()	
	columnName = 'code_economic_activity' #TODO: fill
	value = None
	sendToDB()
	columnName = 'code_data_adjustment' #TODO: fill
	value = None
	sendToDB()
	columnName = 'code_sector' #TODO: fill
	value = None
	sendToDB()	
	columnName = 'code_data_units' #TODO: fill
	value = None
	sendToDB()	
	columnName = 'code_item_type' #TODO: fill
	value = None
	sendToDB()
	columnName = 'txt_data_original_source' #TODO: fill
	value = None
	sendToDB()