Beispiel #1
0
 def insert_test_data(self, dates, values, tickers, periodicity=1, categorical=None):
     for (i, t) in enumerate(tickers):
         try:
             values = values[:, i]
         except IndexError:
             values = values
         hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, name=t, ticker=t, insertIfNot=True)
         if categorical is not None:
             hndl_Data.is_categorical = categorical[i]
         hndl_Data.periodicity = periodicity
         hndl_Data.save_series_db(dates, values)
Beispiel #2
0
	def download_CSV_datasets(self):
		MetadataCols = [	'geography',
							'category_1',
							'sub_category_1',
							'category_1_meaning',
							'IS_CATEGORICAL']
		MetadataColIdxs = [QuandlCSVColumns[x] for x in MetadataCols]
		# For Each Desired Dataset (Using CSV Reader)
		for row in self._hndl_CSV:
			# Download Quandl Data History and Metadata
			download_instr = dict([(name, row[idx]) for (name, idx) in zip(QuandlDnldCols, QuandlDnldColIdxs)])
			hndl_Qndl = self.__download_dataset_singleSeries(**download_instr)
			# For Each Desired Dataset / If No Error
			if hndl_Qndl.error is None:
				# For Each Desired Dataset / If No Error / Create Local Data Handle
				db_name = row[QuandlCSVColumns['db_name']]
				db_ticker = row[QuandlCSVColumns['db_ticker']]
				hndl_Data = EMF_DataSeries_Handle(self._hndl_DB, name=db_name, ticker=db_ticker, insertIfNot=True)
				# For Each Desired Dataset / If No Error / Read Input CSV Data
				metadata = dict([(name, row[idx]) for (name, idx) in zip(MetadataCols, MetadataColIdxs)])
				# For Each Desired Dataset / If No Error / Store Data History in DB
				hndl_Data.save_series_db(hndl_Qndl.dates, hndl_Qndl.values)
				# For Each Desired Dataset / If No Error / Store Metadata in DB
				self.__store_dataset_metadata(hndl_Data, hndl_Qndl, metadata)
				# For Each Desired Dataset / If No Error / Update CSV Variables
				quandl_metadata = {
					'Q_COLUMN_NAME' : hndl_Qndl.Data_Chosen_Column,
					'Q_REFRESHED_AT' : hndl_Qndl.Quandl_Latest_Refresh,
					'Q_EARLIEST_DATE' : hndl_Qndl.Quandl_Earliest_Date,
					'Q_LATEST_DATE' : hndl_Qndl.Quandl_Latest_Date,
					'Q_PERIODICITY' : hndl_Qndl.Quandl_Periodicity,
					'Q_DESCRIPTION' : hndl_Qndl.Quandl_Description,
					'Q_NAME' : hndl_Qndl.Data_Chosen_Column,
					'NUM_COLUMNS' : hndl_Qndl.Data_Num_Columns,
					'NUM_POINTS' : hndl_Qndl.Data_Num_Points,
					'ERROR' : hndl_Qndl.error
				}
			else:
				# For Each Desired Dataset / If Error / Update CSV Variables
				quandl_metadata = {
					'ERROR' : hndl_Qndl.error
				}
			# For Each Desired Dataset / Write Results to CSV
			for (key, val) in quandl_metadata.iteritems():
				try:
					str(val).decode('utf-8') # To prevent writing bad values
					self._hndl_CSV.change_current_row(val, columnName=key)
				except UnicodeDecodeError, e:
					log.warning('QUANDL: Unicode error writing to csv')
					log.warning('QUANDL: Row:{0} Col:{1}'.format(row, key))
				except UnicodeEncodeError, e:
					log.warning('QUANDL: Unicode error writing to csv')
					log.warning('QUANDL: Row:{0} Col:{1}'.format(row, key))
def testDataSeriesHandle(hndl_DB):
	dates = np.reshape(np.arange(200),(200,))
	data = np.random.randint(100,size=(200,))/2.0
	name = 'test1'
	ticker = 'test1'
	dataHandle = EMF_DataSeries_Handle(hndl_DB, name=name, ticker=ticker, insertIfNot=True)
	dataHandle.save_series_db(dates, data)
	assert np.all(dataHandle.get_series_dates() == dates)
	assert np.all(dataHandle.get_series_values() == data)
	assert dataHandle.min_date==0
	assert dataHandle.max_date==199
	assert dataHandle.name == name
	assert dataHandle.ticker == ticker
	def select_resp_words_all_permutations(self):
		log.info('WORDSELECT: Response Words: Choosing All Data Tickers')
		if self._resp_data_tickers is None:
			self.__add_resp_data_tickers()
		log.info('WORDSELECT: Response Words: Choosing All Transformations')
		trns_list = {}
		for trns in self.resp_trns_ptrns:
			for (k, v_list) in self.resp_trns_kwargs.iteritems():
				for v in v_list:
					hndl_Trns = EMF_Transformation_Handle(trns)
					hndl_Trns.set_extra_parameter(k, v)
					trns_list[str(hndl_Trns)] = hndl_Trns
		trns_list = trns_list.values()
		log.info('WORDSELECT: Response Words: Created {0} Transformations'.format(len(trns_list)))
		self._resp_words = []
		count = 0
		for ticker in self.resp_data_tickers:
			hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, ticker=ticker)
			hndl_Data.save_series_local()			
			for hndl_Trns in trns_list:
				hndl_Word = EMF_WordSeries_Handle(self.hndl_DB, hndl_Data, hndl_Trns)
				self._resp_words.append(hndl_Word)
				count += 1
		log.info('WORDSELECT: Response Words: Created {0} Response Words'.format(count))
	def insert_time_to_recession(self):
		# Get Data Read
		hndl_Data_Read = EMF_DataSeries_Handle(self.hndl_DB, ticker='US_Rec_Ind')
		hndl_Data_Read.save_series_local()
		# Insert Time To
		hndl_Data_Wrte = EMF_DataSeries_Handle(	self.hndl_DB, 
												name='Months to Next Recession', 
												ticker='US_TimeUntilRec',
												insertIfNot=True)
		data = hndl_Data_Read.get_series_values()
		dates = hndl_Data_Read.get_series_dates()
		trns_Data = transform_TimeToValue(data, {TIME_TO_VALUE: 1})[DATA_KEY]
		count = self.find_time_to_recession_limits(trns_Data)
		trns_Data = trns_Data[:-count]
		trns_Dates = dates[:-count]
		hndl_Data_Wrte.save_series_db(trns_Dates, trns_Data)
		hndl_Data_Wrte.periodicity = hndl_Data_Read.periodicity
		# Insert Time Since
		hndl_Data_Wrte = EMF_DataSeries_Handle(	self.hndl_DB, 
												name='Months since Last Recession', 
												ticker='US_TimeSinceRec',
												insertIfNot=True)
		
		trns_Data = transform_TimeSinceValue(data, {TIME_SINCE_VALUE: 1})[DATA_KEY]
		count = self.find_time_since_recession_limits(trns_Data)
		trns_Data = trns_Data[count:]
		trns_Dates = dates[count:]
		hndl_Data_Wrte.save_series_db(trns_Dates, trns_Data)		
		hndl_Data_Wrte.periodicity = hndl_Data_Read.periodicity