def train(self, data: DataFrame): """ Train networks to the data (OHLC) passed :param data: Data in OHLC format from the ticks module. :return: the NN trained, and the encoder used """ # Remove the "Date" Column ticks = data.copy(deep=True).drop([self.params.csv_dict['d']], axis=1) # Train encoder = CSEncoder(self.params).fit(ticks) cse = encoder.transform(ticks) dataset = self.prepare_input(encoder, cse, self.params.subtypes) nn = self.train_nn(dataset, self.params.subtypes) encoder.save() return nn, encoder
def predict_next(ticks: DataFrame, encoder: CSEncoder, nn: Dict[str, CS_NN], params: CSDictionary) -> float: """ From a list of ticks, make a prediction of what will be the next CS. :param ticks: a dataframe of ticks with the expected headers and size corresponding to the window size of the network to be used. :param encoder: the encoder used to train the network :param nn: the recurrent network to make the prediction with :param params: the parameters file read from configuration. :return: the close value of the CS predicted. """ # Check that the input group of ticks match the size of the window of # the network that is going to make the predict. That parameter is in # the window_size attribute within the 'encoder'. if ticks.shape[0] != encoder.params.window_size: info_msg = 'Tickgroup resizing: {} -> {}' params.log.info( info_msg.format(ticks.shape[0], encoder.params.window_size)) ticks = ticks.iloc[-encoder.params.window_size():, :] ticks.reset_index() # encode the tick in CSE and OH. Reshape it to the expected LSTM format. cs_tick = encoder.transform(ticks[params.ohlc_tags]) pred_body_cs = predict_body(cs_tick, encoder, nn) pred_move_cs = predict_move(cs_tick, encoder, nn) prediction_cs = pred_body_cs + pred_move_cs # Build a single row dataframe with the entire prediction prediction_df = pd.DataFrame(columns=params.cse_tags, data=np.array(prediction_cs).reshape( -1, len(params.cse_tags))) cs_values = '|'.join('{}'.format(_) for _ in prediction_df[params.cse_tags].values[0]) params.log.info(f"N2t {nn['body'].name} -> {cs_values}") # Convert the prediction to a real tick # TODO: Keep the date in predictions, so we can see for what date the # prediction is being produced # TODO: Instead of returning only the CLOSE value, return the entire # candlestick. pred = encoder.inverse_transform(prediction_df, cs_tick[-1]) return pred['c'].values[-1]
def test_decode_cse(self): """ Check that decodes correctly a tick, given the previous one. """ col_names = list(self.params.csv_dict.keys()) if 'd' in col_names: col_names.remove('d') encoder = CSEncoder(self.params).fit(self.data) cs = encoder.transform(self.data) # Check every tick for i in range(self.data.shape[0]): cs_df = encoded_cs_to_df(cs[i], self.params.cse_tags) prev_cs = cs[0] if i == 0 else cs[i - 1] # Define tolerance as 10% of the min-max range when reconstructing tol = prev_cs.hl_interval_width * 0.1 # Decode the CS, and check tick = encoder._decode_cse(cs_df, prev_cs, col_names) self.assertLessEqual(abs(tick[0] - self.data.iloc[i]['o']), tol) self.assertLessEqual(abs(tick[1] - self.data.iloc[i]['h']), tol) self.assertLessEqual(abs(tick[2] - self.data.iloc[i]['l']), tol) self.assertLessEqual(abs(tick[3] - self.data.iloc[i]['c']), tol)