def get_word_arrays(self, mode=TRAINING, bootstrap=False): filter_ = self.get_model_filter(mode=mode) # Create Predictor Variable Array pred_array = None for hndl_Word in self._pred_words: new_array = self.get_values_by_col_filtered(word_key(hndl_Word), filter_, values_only=True).reshape(-1,1) if pred_array is None: pred_array = new_array else: pred_array = np.hstack((pred_array, new_array)) # Create Response Variable Array resp_array = None if mode==TRAINING: # Don't need to do it if not training for hndl_Word in self._resp_words: # Add History to Array new_array = self.get_values_by_col_filtered(word_key(hndl_Word), filter_, values_only=True).reshape(-1,1) if resp_array is None: resp_array = new_array else: resp_array = np.hstack((resp_array, new_array)) if bootstrap: #ATM, nothing about this is bootstrapped len_ = len(pred_array) n = len_*BOOTSTRAP_MULTIPLIER smp = np.floor(np.random.rand(n)*len_).astype(int) pred_array = pred_array[smp] resp_array = resp_array[smp] return (pred_array, resp_array)
def get_model_filter(self, mode=TRAINING): if mode == PREDICTION_INDEPENDENT: keys = [word_key(w) for w in self._pred_words] elif mode == PREDICTION_DEPENDENT: keys = [word_key(w) for w in self._pred_words] keys += [raw_word_key(w) for w in self._resp_words] elif mode == TRAINING: keys = [word_key(w) for w in self._pred_words] keys += [word_key(w) for w in self._resp_words] else: raise NameError and_ = lambda c1, c2: c1 & c2 return reduce(and_, [~self.col_array[key].isnull() for key in keys])
def fset(self, value): self._pred_words = value for hndl_Word in self._pred_words: dates = hndl_Word.get_series_dates() values = hndl_Word.get_series_values() key_ = word_key(hndl_Word) self.add_column(dates, values, key=key_, force=False)
def fdel(self): log.info('WORDSET: Removing Predictive Words') keys = [word_key(w) for w in self._pred_words] log.info('WORDSET: Removing Response Prediction Words (because Predictors are Removed)') keys += [prd_word_key(w) for w in self._resp_words] for key_ in keys: self.delete_column(key_, expect_metadata=False) del self._pred_words
def fdel(self): log.info('WORDSET: Removing Response Words') keys = [word_key(w) for w in self._resp_words] keys += [raw_word_key(w) for w in self._resp_words] keys += [prd_word_key(w) for w in self._resp_words] del self._resp_words for key_ in keys: self.delete_column(key_, expect_metadata=False)
def fset(self, value): self._resp_words = value for hndl_Word in self._resp_words: # Add Raw Values dates = hndl_Word.get_raw_dates() values = hndl_Word.get_raw_values() raw_key = raw_word_key(hndl_Word) self.add_column(dates, values, key=raw_key, force=False) # Add Transformed Values dates = hndl_Word.get_series_dates() values = hndl_Word.get_series_values() key_ = word_key(hndl_Word) self.add_column(dates, values, key=key_, force=True)