def test(df, q, periods, actions, pip_mul): logging.info('Testing: started...') # initial state s = getState(df, periods) # initial action a = getAction(q, s, 0, actions) # get reward r, ticks = getReward(df, a, pip_mul) # get delta d = getDelta(q, s, a, r) return q, r, d, ticks
def train(df, q, alpha, epsilon, periods, actions, pip_mul, std): logging.info('Training: started...') d = None # initial state s = getState(df, periods) # initial action a = getAction(q, s, epsilon, actions) # get reward r, ticks = getReward(df, a, pip_mul, std) # get delta d = getDelta(q, s, a, r) # update Q q = updateQ(q, s, a, d, r, alpha) return q, r, d, ticks