def insert_test_data(self, dates, values, tickers, periodicity=1, categorical=None): for (i, t) in enumerate(tickers): try: values = values[:, i] except IndexError: values = values hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, name=t, ticker=t, insertIfNot=True) if categorical is not None: hndl_Data.is_categorical = categorical[i] hndl_Data.periodicity = periodicity hndl_Data.save_series_db(dates, values)
def download_CSV_datasets(self): MetadataCols = [ 'geography', 'category_1', 'sub_category_1', 'category_1_meaning', 'IS_CATEGORICAL'] MetadataColIdxs = [QuandlCSVColumns[x] for x in MetadataCols] # For Each Desired Dataset (Using CSV Reader) for row in self._hndl_CSV: # Download Quandl Data History and Metadata download_instr = dict([(name, row[idx]) for (name, idx) in zip(QuandlDnldCols, QuandlDnldColIdxs)]) hndl_Qndl = self.__download_dataset_singleSeries(**download_instr) # For Each Desired Dataset / If No Error if hndl_Qndl.error is None: # For Each Desired Dataset / If No Error / Create Local Data Handle db_name = row[QuandlCSVColumns['db_name']] db_ticker = row[QuandlCSVColumns['db_ticker']] hndl_Data = EMF_DataSeries_Handle(self._hndl_DB, name=db_name, ticker=db_ticker, insertIfNot=True) # For Each Desired Dataset / If No Error / Read Input CSV Data metadata = dict([(name, row[idx]) for (name, idx) in zip(MetadataCols, MetadataColIdxs)]) # For Each Desired Dataset / If No Error / Store Data History in DB hndl_Data.save_series_db(hndl_Qndl.dates, hndl_Qndl.values) # For Each Desired Dataset / If No Error / Store Metadata in DB self.__store_dataset_metadata(hndl_Data, hndl_Qndl, metadata) # For Each Desired Dataset / If No Error / Update CSV Variables quandl_metadata = { 'Q_COLUMN_NAME' : hndl_Qndl.Data_Chosen_Column, 'Q_REFRESHED_AT' : hndl_Qndl.Quandl_Latest_Refresh, 'Q_EARLIEST_DATE' : hndl_Qndl.Quandl_Earliest_Date, 'Q_LATEST_DATE' : hndl_Qndl.Quandl_Latest_Date, 'Q_PERIODICITY' : hndl_Qndl.Quandl_Periodicity, 'Q_DESCRIPTION' : hndl_Qndl.Quandl_Description, 'Q_NAME' : hndl_Qndl.Data_Chosen_Column, 'NUM_COLUMNS' : hndl_Qndl.Data_Num_Columns, 'NUM_POINTS' : hndl_Qndl.Data_Num_Points, 'ERROR' : hndl_Qndl.error } else: # For Each Desired Dataset / If Error / Update CSV Variables quandl_metadata = { 'ERROR' : hndl_Qndl.error } # For Each Desired Dataset / Write Results to CSV for (key, val) in quandl_metadata.iteritems(): try: str(val).decode('utf-8') # To prevent writing bad values self._hndl_CSV.change_current_row(val, columnName=key) except UnicodeDecodeError, e: log.warning('QUANDL: Unicode error writing to csv') log.warning('QUANDL: Row:{0} Col:{1}'.format(row, key)) except UnicodeEncodeError, e: log.warning('QUANDL: Unicode error writing to csv') log.warning('QUANDL: Row:{0} Col:{1}'.format(row, key))
def testDataSeriesHandle(hndl_DB): dates = np.reshape(np.arange(200),(200,)) data = np.random.randint(100,size=(200,))/2.0 name = 'test1' ticker = 'test1' dataHandle = EMF_DataSeries_Handle(hndl_DB, name=name, ticker=ticker, insertIfNot=True) dataHandle.save_series_db(dates, data) assert np.all(dataHandle.get_series_dates() == dates) assert np.all(dataHandle.get_series_values() == data) assert dataHandle.min_date==0 assert dataHandle.max_date==199 assert dataHandle.name == name assert dataHandle.ticker == ticker
def select_resp_words_all_permutations(self): log.info('WORDSELECT: Response Words: Choosing All Data Tickers') if self._resp_data_tickers is None: self.__add_resp_data_tickers() log.info('WORDSELECT: Response Words: Choosing All Transformations') trns_list = {} for trns in self.resp_trns_ptrns: for (k, v_list) in self.resp_trns_kwargs.iteritems(): for v in v_list: hndl_Trns = EMF_Transformation_Handle(trns) hndl_Trns.set_extra_parameter(k, v) trns_list[str(hndl_Trns)] = hndl_Trns trns_list = trns_list.values() log.info('WORDSELECT: Response Words: Created {0} Transformations'.format(len(trns_list))) self._resp_words = [] count = 0 for ticker in self.resp_data_tickers: hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, ticker=ticker) hndl_Data.save_series_local() for hndl_Trns in trns_list: hndl_Word = EMF_WordSeries_Handle(self.hndl_DB, hndl_Data, hndl_Trns) self._resp_words.append(hndl_Word) count += 1 log.info('WORDSELECT: Response Words: Created {0} Response Words'.format(count))
def insert_time_to_recession(self): # Get Data Read hndl_Data_Read = EMF_DataSeries_Handle(self.hndl_DB, ticker='US_Rec_Ind') hndl_Data_Read.save_series_local() # Insert Time To hndl_Data_Wrte = EMF_DataSeries_Handle( self.hndl_DB, name='Months to Next Recession', ticker='US_TimeUntilRec', insertIfNot=True) data = hndl_Data_Read.get_series_values() dates = hndl_Data_Read.get_series_dates() trns_Data = transform_TimeToValue(data, {TIME_TO_VALUE: 1})[DATA_KEY] count = self.find_time_to_recession_limits(trns_Data) trns_Data = trns_Data[:-count] trns_Dates = dates[:-count] hndl_Data_Wrte.save_series_db(trns_Dates, trns_Data) hndl_Data_Wrte.periodicity = hndl_Data_Read.periodicity # Insert Time Since hndl_Data_Wrte = EMF_DataSeries_Handle( self.hndl_DB, name='Months since Last Recession', ticker='US_TimeSinceRec', insertIfNot=True) trns_Data = transform_TimeSinceValue(data, {TIME_SINCE_VALUE: 1})[DATA_KEY] count = self.find_time_since_recession_limits(trns_Data) trns_Data = trns_Data[count:] trns_Dates = dates[count:] hndl_Data_Wrte.save_series_db(trns_Dates, trns_Data) hndl_Data_Wrte.periodicity = hndl_Data_Read.periodicity