def select_pred_words_effectiveness(self, numWords=None): assert self.resp_words is not None # Add Predictive Words if self._pred_data_tickers is None: self.__add_pred_data_tickers() self.create_data_sorting_array() # Select How Many Words if numWords is None: numWords = geometric(PRED_COUNT_GEOMETRIC_PARAM) + PRED_COUNT_FLOOR # Select Words count = 0 chosen = {} resp_word_names = [str(w) for w in self.resp_words] min_date = max([w.min_date for w in self.resp_words]) max_date = min([w.max_date for w in self.resp_words]) while (len(chosen) < numWords) and (count < numWords * 10): count += 1 # Avoid Infinite Loops # Select Words / Create Data Handle id_ = weighted_choice(self._pred_data_eff_array["prob"].iteritems(), self._total_prob) ticker = retrieve_DataSeriesTicker(self.hndl_DB.cursor, id_) hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, ticker=ticker) # Select Words / Create Trans Handle hndl_Trns = EMF_Transformation_Handle(choice(self._pred_trns_ptrns)) # Select Words / Create Trans Handle / Add Parameters to Trans Handle hndl_Trns.parameters = self.__create_random_trns_params_pred() # Select Words / Create Word Handle hndl_Word = EMF_WordSeries_Handle(self.hndl_DB, hndl_Data, hndl_Trns) # Select Words / Ensure Word Validity # Select Words / Ensure Word Validity / Make sure Word Isn't Response Word wordName = str(hndl_Word) if wordName in resp_word_names: continue # Select Words / Ensure Word Validity / Make sure Word Isn't Already Taken if wordName in chosen: continue # Select Words / Ensure Word Validity / Make sure Dates Don't Conflict min_challenger = hndl_Word.min_date if min_challenger >= max_date: continue max_challenger = hndl_Word.max_date if max_challenger <= min_date: continue min_date = max(min_date, min_challenger) max_date = min(max_date, max_challenger) # Select Words / Add Word to Set chosen[wordName] = hndl_Word log.info("WORDSELECT: Predictive Words: Chose {0}".format(wordName)) # TEST: Delete log.info("WORDSELECT: Predictive Words: Chose {0}".format(chosen.keys())) self._pred_words = chosen.values() return self.pred_words
def select_pred_words_random(self, numWords=None): assert self.resp_words is not None # Add Predictive Words if self._pred_data_tickers is None: self.__add_pred_data_tickers() # Select How Many Words if numWords is None: numWords = geometric(PRED_COUNT_GEOMETRIC_PARAM) + PRED_COUNT_FLOOR log.info("WORDSELECT: Predictive Words: Choosing {0} words".format(numWords)) # Select Words count = 0 chosen = {} resp_word_names = [ str(w) for w in self.resp_words ] # TODO: Can make this a one-time calculation to avoid repitition min_date = max([w.min_date for w in self.resp_words]) max_date = min([w.max_date for w in self.resp_words]) while (len(chosen) < numWords) and (count < numWords * 10): count += 1 # Avoid Infinite Loops # Select Words / Create Data Handle hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, ticker=choice(self._pred_data_tickers)) # Select Words / Create Trans Handle hndl_Trns = EMF_Transformation_Handle(choice(self._pred_trns_ptrns)) # Select Words / Create Trans Handle / Add Parameters to Trans Handle hndl_Trns.parameters = self.__create_random_trns_params_pred() # Select Words / Create Word Handle hndl_Word = EMF_WordSeries_Handle(self.hndl_DB, hndl_Data, hndl_Trns) # Select Words / Ensure Word Validity # Select Words / Ensure Word Validity / Make sure Word Isn't Response Word wordName = str(hndl_Word) if wordName in resp_word_names: continue # Select Words / Ensure Word Validity / Make sure Word Isn't Already Taken if wordName in chosen: continue # Select Words / Ensure Word Validity / Make sure Dates Don't Conflict min_challenger = hndl_Word.min_date if min_challenger >= max_date: continue max_challenger = hndl_Word.max_date if max_challenger <= min_date: continue min_date = max(min_date, min_challenger) max_date = min(max_date, max_challenger) # Select Words / Add Word to Set chosen[wordName] = hndl_Word log.info("WORDSELECT: Predictive Words: Chose {0}".format(chosen.keys())) self._pred_words = chosen.values() return self.pred_words
def test_Current_Level_Cat_pattern(data, hndl_Time): hndl_Srs = EMF_TestSeries_Handle() hndl_Srs.values = data hndl_Srs.dates = hndl_Time.get_dates() trns_kwargs = {NUM_RANGES:10, FIRST_ORDER_DIFF_TIME:10} hndl_Trns = EMF_Transformation_Handle('Futr_Lvl_Cat', trnsKwargs=trns_kwargs) trns_data = hndl_Trns.transform_data(data) hndl_Trns.transform_time(hndl_Time) hndl_Srs_Trns = EMF_TestSeries_Handle() hndl_Srs_Trns.values = trns_data hndl_Srs_Trns.dates = hndl_Time.get_dates() utl_Tst.plot_data_series(hndl_Srs, hndl_Srs_Trns) raise NotImplementedError
def select_pred_words_random(self, numWords=None): assert self._resp_word is not None # Add Predictive Words if self._pred_data_tickers is None: self.__add_pred_data_tickers() # Select How Many Words if numWords is None: numWords = geometric(PRED_COUNT_GEOMETRIC_PARAM) + PRED_COUNT_FLOOR log.info('WORDSELECT: Predictive Words: Choosing {0} words'.format(numWords)) # Select Words count = 0 chosen = {} min_date = self.resp_data_min_date if min_date is None: min_ = maxint max_date = self.resp_data_max_date if max_date is None: max_ = -maxint-1 while (len(chosen) < numWords) and (count < numWords*10): count += 1 # Avoid Infinite Loops # Select Words / Create Data Handle ticker = choice(self._resp_data_tickers) hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, ticker=ticker) # Select Words / Create Trans Handle hndl_Trns = EMF_Transformation_Handle(choice(self._pred_trns_ptrns)) # Select Words / Create Trans Handle / Add Parameters to Trans Handle hndl_Trns.parameters = self.__create_random_trns_params() # Select Words / Create Word Handle hndl_Word = EMF_WordSeries_Handle(self.hndl_DB, hndl_Data, hndl_Trns) # Select Words / Ensure Word Validity # Select Words / Ensure Word Validity / Make sure Word Isn't Response Word wordName = str(hndl_Word) if wordName == str(self._resp_word): continue # Select Words / Ensure Word Validity / Make sure Dates Don't Conflict min_challenger = hndl_Word.min_date if min_challenger >= max_date: continue max_challenger = hndl_Word.max_date if max_challenger <= min_date: continue min_date = max(min_date, min_challenger) max_date = min(max_date, max_challenger) # Select Words / Add Word to Set chosen[wordName] = hndl_Word log.info('WORDSELECT: Predictive Words: Chose {0}'.format(wordName)) # TEST: Delete log.info('WORDSELECT:Predictive Words: Chose {0}'.format(chosen.keys())) self._pred_words = chosen.values() return self.pred_words
def testTransformationReversal_Future_FoD(data, dt, hndl_Srs_Original): trnsKwargs={FIRST_ORDER_DIFF_TIME: 10} hndl_Trns = EMF_Transformation_Handle('Futr_Change', trnsKwargs=trnsKwargs) dt_trns = hndl_Trns.transform_time(dt) data_trns = hndl_Trns.transform_data(data) hndl_Srs_Trns = EMF_TestSeries_Handle() hndl_Srs_Trns.set_series_dates(dt_trns) hndl_Srs_Trns.set_series_values(data_trns) # utl_Tst.plot_data_series(hndl_Srs_Original, hndl_Srs_Trns) assert np.all(dt_trns == dt[:-10]) assert data_trns[0] == data[10] - data[00] dt_rvrs = hndl_Trns.reverse_transform_time(dt_trns) data_rvrs = hndl_Trns.reverse_transform_data(data[:-10], predictionDelta=data_trns) hndl_Srs_Rvrs = EMF_TestSeries_Handle() hndl_Srs_Rvrs.set_series_dates(dt_rvrs) hndl_Srs_Rvrs.set_series_values(data_rvrs) utl_Tst.plot_data_series(hndl_Srs_Original, hndl_Srs_Rvrs) assert np.all(dt_rvrs == dt[10:])
def testTransformationReversal_Past_Lvl(data, dt, hndl_Srs_Original): trnsKwargs={'PeriodDiff': 10} hndl_Trns = EMF_Transformation_Handle('Past_Lvl', trnsKwargs=trnsKwargs) dt_trns = hndl_Trns.transform_time(dt) data_trns = hndl_Trns.transform_data(data) hndl_Srs_Trns = EMF_TestSeries_Handle() hndl_Srs_Trns.set_series_dates(dt_trns) hndl_Srs_Trns.set_series_values(data_trns) # utl_Tst.plot_data_series(hndl_Srs_Original, hndl_Srs_Trns) assert np.all(dt_trns == dt[10:]) dt_rvrs = hndl_Trns.reverse_transform_time(dt_trns) data_rvrs = hndl_Trns.reverse_transform_data(data_trns) hndl_Srs_Rvrs = EMF_TestSeries_Handle() hndl_Srs_Rvrs.set_series_dates(dt_rvrs) hndl_Srs_Rvrs.set_series_values(data_rvrs) utl_Tst.plot_data_series(hndl_Srs_Original, hndl_Srs_Rvrs) assert np.all(dt_rvrs == dt[:-10]) assert np.all(data_rvrs == data[:-10])
def select_pred_words_all_tickers(self, trns_ptrn=None, trns_kwargs=None, trns_rndm=False): self._pred_words = [] log.info('WORDSELECT: Predictive Words: Choosing All Data Tickers') if trns_ptrn is None: if trns_rndm: ticker = choice(self._pred_data_tickers) else: trns_ptrn = 'None' if self._pred_data_tickers is None: self.__add_pred_data_tickers() hndl_Trns = EMF_Transformation_Handle(trns_ptrn) if trns_kwargs is None: trns_kwargs = self.pred_trns_kwargs hndl_Trns.parameters = self.__create_random_trns_params(kwargs_list=trns_kwargs) log.info('WORDSELECT: Predictive Words: Chose {0} Transformation'.format(hndl_Trns)) for ticker in self._pred_data_tickers: hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, ticker=ticker) hndl_Word = EMF_WordSeries_Handle(self.hndl_DB, hndl_Data, hndl_Trns) self._pred_words.append(hndl_Word)
def test_None_pattern(data, hndl_Time): hndl_Trns = EMF_Transformation_Handle('None') trns_data = hndl_Trns.transform_data(data) hndl_Trns.transform_time(hndl_Time) assert np.all(trns_data == data) assert np.all(hndl_Time.get_dates() == dt) trns_data = hndl_Trns.reverse_transform_data(None, modifier=data) hndl_Trns.reverse_transform_time(hndl_Time) assert np.all(trns_data == data) assert np.all(hndl_Time.get_dates() == dt)
def test_Past_Level_pattern(data, hndl_Time): trns_kwargs = {PERIODS_AWAY:10} hndl_Trns = EMF_Transformation_Handle('Past_Lvl', trnsKwargs=trns_kwargs) trns_data = hndl_Trns.transform_data(data) hndl_Trns.transform_time(hndl_Time) assert np.all(trns_data == data) assert np.all(hndl_Time.get_dates() == Futr_Shift_dt) trns_data = hndl_Trns.reverse_transform_data(None, modifier=trns_data) hndl_Trns.reverse_transform_time(hndl_Time) assert np.all(trns_data == data) assert np.all(hndl_Time.get_dates() == dt)
def test_Futr_Change_pattern(data, hndl_Time): trns_kwargs = {FIRST_ORDER_DIFF_TIME:10} hndl_Trns = EMF_Transformation_Handle('Futr_Change', trnsKwargs=trns_kwargs) trns_data = hndl_Trns.transform_data(data) hndl_Trns.transform_time(hndl_Time) assert np.all(trns_data == FoD_Constant) assert np.all(hndl_Time.get_dates() == Futr_Truncate_dt) trns_data = hndl_Trns.reverse_transform_data(data[:-10], modifier=trns_data) hndl_Trns.reverse_transform_time(hndl_Time) assert np.all(trns_data == data[10:]) assert np.all(hndl_Time.get_dates() == dt[10:])
def select_resp_words_all_permutations(self): log.info('WORDSELECT: Response Words: Choosing All Data Tickers') if self._resp_data_tickers is None: self.__add_resp_data_tickers() log.info('WORDSELECT: Response Words: Choosing All Transformations') trns_list = {} for trns in self.resp_trns_ptrns: for (k, v_list) in self.resp_trns_kwargs.iteritems(): for v in v_list: hndl_Trns = EMF_Transformation_Handle(trns) hndl_Trns.set_extra_parameter(k, v) trns_list[str(hndl_Trns)] = hndl_Trns trns_list = trns_list.values() log.info('WORDSELECT: Response Words: Created {0} Transformations'.format(len(trns_list))) self._resp_words = [] count = 0 for ticker in self.resp_data_tickers: hndl_Data = EMF_DataSeries_Handle(self.hndl_DB, ticker=ticker) hndl_Data.save_series_local() for hndl_Trns in trns_list: hndl_Word = EMF_WordSeries_Handle(self.hndl_DB, hndl_Data, hndl_Trns) self._resp_words.append(hndl_Word) count += 1 log.info('WORDSELECT: Response Words: Created {0} Response Words'.format(count))
def testTransformationHandle(): np.random.seed(10) n=200 data = np.random.randint(100,size=(n,1)) dt = np.arange(n) hndl_Trns = EMF_Transformation_Handle('None') assert np.all(hndl_Trns.transform_data(data) == data) assert str(hndl_Trns) == 'raw' # Test Past hndl_Trns = EMF_Transformation_Handle('Past_Lvl') hndl_Trns.set_extra_parameter(PERIODS_AWAY, 10) assert hndl_Trns.transform_data(data).shape == (190,1) assert np.all(hndl_Trns.transform_time(dt) == np.arange(10,n)) assert str(hndl_Trns) == 'PastLvl.10' # Test Future hndl_Trns = EMF_Transformation_Handle('Futr_Change') hndl_Trns.set_extra_parameter(FIRST_ORDER_DIFF_TIME, 20) assert hndl_Trns.transform_data(data).shape == (180,1) assert np.all(hndl_Trns.transform_time(dt) == np.arange(n-20)) assert str(hndl_Trns) == 'FutrDiff.20'
def testTransformationReversal_None(data, dt): hndl_Trns = EMF_Transformation_Handle('None') dt_trns = hndl_Trns.transform_time(dt) assert np.all(dt_trns == dt) assert np.all(hndl_Trns.reverse_transform_time(dt_trns) == dt)