def test_inverse_transform(self): """ Test that we can reverse a transformation to the original values. """ encoder = CSEncoder(self.params) cse = encoder.fit_transform(self.data) # Inverse transform needs a dataframe as input, and the first CS. df = cs_to_df(cse, self.params.cse_tags) inv_cse = encoder.inverse_transform(df, cse[0]) for i in range(inv_cse.shape[0]): self.assertEqual(inv_cse.iloc[i]['o'], self.data.iloc[i]['o'])
def predict_next(ticks: DataFrame, encoder: CSEncoder, nn: Dict[str, CS_NN], params: CSDictionary) -> float: """ From a list of ticks, make a prediction of what will be the next CS. :param ticks: a dataframe of ticks with the expected headers and size corresponding to the window size of the network to be used. :param encoder: the encoder used to train the network :param nn: the recurrent network to make the prediction with :param params: the parameters file read from configuration. :return: the close value of the CS predicted. """ # Check that the input group of ticks match the size of the window of # the network that is going to make the predict. That parameter is in # the window_size attribute within the 'encoder'. if ticks.shape[0] != encoder.params.window_size: info_msg = 'Tickgroup resizing: {} -> {}' params.log.info( info_msg.format(ticks.shape[0], encoder.params.window_size)) ticks = ticks.iloc[-encoder.params.window_size():, :] ticks.reset_index() # encode the tick in CSE and OH. Reshape it to the expected LSTM format. cs_tick = encoder.transform(ticks[params.ohlc_tags]) pred_body_cs = predict_body(cs_tick, encoder, nn) pred_move_cs = predict_move(cs_tick, encoder, nn) prediction_cs = pred_body_cs + pred_move_cs # Build a single row dataframe with the entire prediction prediction_df = pd.DataFrame(columns=params.cse_tags, data=np.array(prediction_cs).reshape( -1, len(params.cse_tags))) cs_values = '|'.join('{}'.format(_) for _ in prediction_df[params.cse_tags].values[0]) params.log.info(f"N2t {nn['body'].name} -> {cs_values}") # Convert the prediction to a real tick # TODO: Keep the date in predictions, so we can see for what date the # prediction is being produced # TODO: Instead of returning only the CLOSE value, return the entire # candlestick. pred = encoder.inverse_transform(prediction_df, cs_tick[-1]) return pred['c'].values[-1]