Example #1
0
 def test_inverse_transform(self):
     """
     Test that we can reverse a transformation to the original values.
     """
     encoder = CSEncoder(self.params)
     cse = encoder.fit_transform(self.data)
     # Inverse transform needs a dataframe as input, and the first CS.
     df = cs_to_df(cse, self.params.cse_tags)
     inv_cse = encoder.inverse_transform(df, cse[0])
     for i in range(inv_cse.shape[0]):
         self.assertEqual(inv_cse.iloc[i]['o'], self.data.iloc[i]['o'])
Example #2
0
def predict_next(ticks: DataFrame, encoder: CSEncoder, nn: Dict[str, CS_NN],
                 params: CSDictionary) -> float:
    """
    From a list of ticks, make a prediction of what will be the next CS.

    :param ticks: a dataframe of ticks with the expected headers and size
        corresponding to the window size of the network to be used.
    :param encoder: the encoder used to train the network
    :param nn: the recurrent network to make the prediction with
    :param params: the parameters file read from configuration.

    :return: the close value of the CS predicted.
    """
    # Check that the input group of ticks match the size of the window of
    # the network that is going to make the predict. That parameter is in
    # the window_size attribute within the 'encoder'.
    if ticks.shape[0] != encoder.params.window_size:
        info_msg = 'Tickgroup resizing: {} -> {}'
        params.log.info(
            info_msg.format(ticks.shape[0], encoder.params.window_size))
        ticks = ticks.iloc[-encoder.params.window_size():, :]
        ticks.reset_index()

    # encode the tick in CSE and OH. Reshape it to the expected LSTM format.
    cs_tick = encoder.transform(ticks[params.ohlc_tags])
    pred_body_cs = predict_body(cs_tick, encoder, nn)
    pred_move_cs = predict_move(cs_tick, encoder, nn)
    prediction_cs = pred_body_cs + pred_move_cs

    # Build a single row dataframe with the entire prediction
    prediction_df = pd.DataFrame(columns=params.cse_tags,
                                 data=np.array(prediction_cs).reshape(
                                     -1, len(params.cse_tags)))

    cs_values = '|'.join('{}'.format(_)
                         for _ in prediction_df[params.cse_tags].values[0])
    params.log.info(f"N2t {nn['body'].name} -> {cs_values}")

    # Convert the prediction to a real tick
    # TODO: Keep the date in predictions, so we can see for what date the
    #       prediction is being produced
    # TODO: Instead of returning only the CLOSE value, return the entire
    #       candlestick.
    pred = encoder.inverse_transform(prediction_df, cs_tick[-1])
    return pred['c'].values[-1]