Example #1
0
def q_learn_vs_q_learn(action_size, batch_size, env, hidden, state_size):
    epsilon1 = 0.8
    a1 = QLearner.QLearner(env.state, state_size, action_size, epsilon1, 0.99,
                           env)

    epsilon2 = 0.8
    a2 = QLearner.QLearner(env.state, state_size, action_size, epsilon2, 0.99,
                           env)

    EVAL_OPPONENT_OPTIMALITY_PROPORTION = 1
    eval_opponent = RandomOptimalAgent.RandomOptimalAgent(
        EVAL_OPPONENT_OPTIMALITY_PROPORTION,
        state_size,
        action_size,
        mem_len=0)
    # number of training games
    number_of_games = 15000
    t = Trainer(env, a1, a2, eval_opponent)
    t.experiment(hidden=hidden,
                 epsilon1=epsilon1,
                 epsilon2=epsilon2,
                 eps_auto_decay=True,
                 eps_limit=10000,
                 n=number_of_games,
                 batch_size=batch_size,
                 selfplay=False)
    return
Example #2
0
def test_q_learner():
    #this test should fail if test_q_value_iteration_aima_mdp fails
    test_q_value_iteration_aima_mdp()
    mdp = get_aima_mdp()
    Q_opt = mdp.q_value_iteration()
    agent = QLearner(mdp, 0.06)
    learned_Q = agent.learn(350000, epsilon=0.3, anneal_rate=0.0000001)
    np.testing.assert_array_almost_equal(Q_opt, learned_Q, decimal=2)
Example #3
0
def test_code():

    verbose = True  # print lots of debug stuff if True

    # read in the map
    filename = 'testworlds/world03.csv'
    inf = open(filename)
    data = np.array(
        [map(float,
             s.strip().split(',')) for s in inf.readlines()])
    originalmap = data.copy(
    )  #make a copy so we can revert to the original map later

    if verbose: printmap(data)

    rand.seed(5)

    ######## run non-dyna test ########
    learner = ql.QLearner(num_states=100,\
        num_actions = 4, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.98, \
        radr = 0.999, \
        dyna = 0, \
        verbose=False) #initialize the learner
    iterations = 500
    start = time.time()
    total_reward = test(data, iterations, learner, verbose)
    stop = time.time()
    print iterations, "median total_reward", total_reward
    print
    non_dyna_score = total_reward
    print 'time (s): {}'.format(stop - start)

    ######## run dyna test ########
    learner = ql.QLearner(num_states=100,\
        num_actions = 4, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.5, \
        radr = 0.99, \
        dyna = 200, \
        verbose=False) #initialize the learner
    iterations = 50
    data = originalmap.copy()
    start = time.time()
    total_reward = test(data, iterations, learner, verbose)
    stop = time.time()
    print iterations, "median total_reward", total_reward
    dyna_score = total_reward
    print 'time (s): {}'.format(stop - start)

    print
    print
    print "results for", filename
    print "non_dyna_score:", non_dyna_score
    print "dyna_score    :", dyna_score
Example #4
0
def test_code():

    verbose = True  # print lots of debug stuff if True

    # read in the map
    filename = "testworlds/world01.csv"
    inf = open(filename)
    data = np.array(
        [list(map(float,
                  s.strip().split(","))) for s in inf.readlines()])
    originalmap = (data.copy()
                   )  # make a copy so we can revert to the original map later

    if verbose:
        printmap(data)

    rand.seed(5)

    ######## run non-dyna test ########
    learner = ql.QLearner(
        num_states=100,
        num_actions=4,
        alpha=0.2,
        gamma=0.9,
        rar=0.98,
        radr=0.999,
        dyna=0,
        verbose=False,
    )  # initialize the learner
    epochs = 500
    total_reward = test(data, epochs, learner, verbose)
    print(f"{epochs}, median total_reward {total_reward}")
    print()
    non_dyna_score = total_reward

    ######## run dyna test ########
    learner = ql.QLearner(
        num_states=100,
        num_actions=4,
        alpha=0.2,
        gamma=0.9,
        rar=0.5,
        radr=0.99,
        dyna=200,
        verbose=False,
    )  # initialize the learner
    epochs = 50
    data = originalmap.copy()
    total_reward = test(data, epochs, learner, verbose)
    print(f"{epochs}, median total_reward {total_reward}")
    dyna_score = total_reward

    print()
    print()
    print(f"results for {filename}")
    print(f"non_dyna_score: {non_dyna_score}")
    print(f"dyna_score    : {dyna_score}")
Example #5
0
def test_code():
    verbose = False  # print lots of debug stuff if True

    if len(sys.argv) != 2:
        print "Usage: python testlearner.py <filename>"
        sys.exit(1)

    # read in the map
    filename = sys.argv[1]
    f = open(filename)
    data = np.array([map(float, s.strip().split(',')) for s in f.readlines()])

    # make a copy so we can revert to the original map later
    originalmap = data.copy()
    if verbose:
        printmap(data)

    rand.seed(5)

    # run non-dyna test #
    start = timer()
    learner = ql.QLearner(num_states=100,
                          num_actions=4,
                          alpha=0.2,
                          gamma=0.9,
                          rar=0.98,
                          radr=0.999,
                          dyna=0,
                          verbose=False)  # initialize the learner
    iterations = 500
    non_dyna_score = test(data, iterations, learner, verbose)
    end = timer()
    non_dyna_time = (end - start)

    # run dyna test #
    start = timer()
    learner = ql.QLearner(num_states=100,
                          num_actions=4,
                          alpha=0.2,
                          gamma=0.9,
                          rar=0.5,
                          radr=0.99,
                          dyna=200,
                          verbose=False)  # initialize the learner
    iterations = 50
    data = originalmap.copy()
    dyna_score = test(data, iterations, learner, verbose)
    end = timer()
    dyna_time = (end - start)

    print '--------------------'
    print "results for", filename
    print '{:15} {}'.format('non_dyna_score:', non_dyna_score),
    print '{:15} {:.2f}s'.format('non_dyna_time:', non_dyna_time)
    print '{:15} {}'.format('dyna_score:', dyna_score),
    print '{:15} {:.2f}s'.format('dyna_time:', dyna_time)
Example #6
0
def test_code():

    verbose = False  # print lots of debug stuff if True

    # read in the map
    filename = 'testworlds/world01.csv'
    inf = open(filename)
    data = np.array(
        [map(float,
             s.strip().split(',')) for s in inf.readlines()])
    originalmap = data.copy(
    )  #make a copy so we can revert to the original map later

    if verbose: printmap(data)

    rand.seed(5)

    ######## run non-dyna test ########
    learner = ql.QLearner(num_states=100,\
        num_actions = 4, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.5, \
        radr = 0.999, \
        dyna = 0, \
        verbose=False) #initialize the learner

    iterations = 1

    total_reward = test(data, iterations, learner, verbose)
    print "results for", filename
    print iterations, "iterations with total_reward", total_reward

    ######## run dyna test ########
    learner = ql.QLearner(num_states=100,\
        num_actions = 4, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.5, \
        radr = 0.99, \
        dyna = 10, \
        verbose=False) #initialize the learner

    iterations = 1

    data = originalmap.copy()
    start = time.time()
    total_reward = test(data, iterations, learner, verbose)
    end = time.time()
    print('dyna iterations', end - start)

    print "results for", filename
    print iterations, "iterations with total_reward", total_reward

    import cProfile
    cProfile.run('test(data, iterations, learner, verbose)')
Example #7
0
def test_code():
    robot_qlearning_testing_seed = 1490652871
    rand.seed(robot_qlearning_testing_seed)
    np.random.seed(robot_qlearning_testing_seed)

    verbose = True # print lots of debug stuff if True

    # read in the map
    filename = 'testworlds/world01.csv'
    inf = open(filename)
    data = np.array([map(float,s.strip().split(',')) for s in inf.readlines()])
    originalmap = data.copy() #make a copy so we can revert to the original map later

    if verbose: printmap(data)

    rand.seed(5)

    ######## run non-dyna test ########
    learner = ql.QLearner(num_states=100,\
        num_actions = 4, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.98, \
        radr = 0.999, \
        dyna = 0, \
        verbose=False) #initialize the learner
    epochs = 500
    total_reward = test(data, epochs, learner, verbose)
    print epochs, "median total_reward" , total_reward
    print
    non_dyna_score = total_reward

    ######## run dyna test ########
    learner = ql.QLearner(num_states=100,\
        num_actions = 4, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.5, \
        radr = 0.99, \
        dyna = 200, \
        verbose=False) #initialize the learner
    epochs = 50
    data = originalmap.copy()
    total_reward = test(data, epochs, learner, verbose)
    print epochs, "median total_reward" , total_reward
    dyna_score = total_reward

    print
    print
    print "results for", filename
    print "non_dyna_score:", non_dyna_score
    print "dyna_score    :", dyna_score
    def __init__(self, impact=0.0, num_shares = 1000, epochs = 100, num_steps=10, commission = 0.00, verbose = False, **kwargs):
        """Create a strategy learner (Q-learning based) that can learn a trading policy

        Inputs / Parameters:
            impact: The amount the price moves against the trader compared to the historical data at each transaction
            num_shares: The number of shares that can be traded in one order
            epochs: The number of times to train the QLearner
            num_steps: The number of steps used in getting thresholds for the discretization process. It is the number of groups to put data into.
            commission: The fixed amount in dollars charged for each transaction
            verbose: If False, no plots. If True, print and plot data in add_evidence
            **kwargs: These are the arguments for QLearner
        """
        # Set constants for positions (which become our order signals)
        self.SHORT = -1.0
        self.CASH = 0.0
        self.LONG = 1.0

        self.epochs = epochs
        self.num_steps = num_steps
        self.num_shares = num_shares
        self.impact = impact
        self.commission = commission
        self.verbose = verbose

        # Initialize a QLearner for this Strategy Learner
        self.QLearner = ql.QLearner(**kwargs)
Example #9
0
    def testPolicy(self, symbol = "IBM", \
        sd=dt.datetime(2009,1,1), \
        ed=dt.datetime(2010,1,1), \
        sv = 10000):
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        # Call indicators function to receive indicators with dates before starting range
        normalAverage, bollBand, MACDInd, momentum, prices = plotIndicators(
            sd - dt.timedelta(days=80), ed, syms)

        # Get indicators for the given dates
        bollBand = bollBand.loc[prices.index >= sd]
        normalAverage = normalAverage.loc[prices.index >= sd]
        MACDInd = MACDInd.loc[prices.index >= sd]
        momentum = momentum.loc[prices.index >= sd]
        prices = prices.loc[prices.index >= sd]

        # Discretize values in the dataframs

        bollBand = self.dfDiscretize(bollBand, steps=self.bins)
        normalAverage = self.dfDiscretize(normalAverage, steps=self.bins)
        MACDInd = self.dfDiscretize(MACDInd, steps=self.bins)
        momentum = self.dfDiscretize(momentum, steps=self.bins)
        firstState = (bollBand.iloc[0][symbol], normalAverage.iloc[0][symbol],
                      MACDInd.iloc[0][symbol], momentum.iloc[0][symbol])
        dims = (self.bins, self.bins, self.bins, self.bins)
        firstState = np.ravel_multi_index(firstState, dims=dims)
        self.learner = ql.QLearner(num_states=self.bins**4,
                                   num_actions=5,
                                   rar=0.98,
                                   verbose=False)

        action = self.learner.querysetstate(firstState)
        shares = 0
        orders = pd.DataFrame(columns=["Orders"], index=prices.index)
        for j in range(len(prices.index)):
            # Buy allowed
            if action == 0 and shares in [0, -500]:
                shares += 500
                orders.iloc[j]["Orders"] = 500
            elif action == 1 and shares == -500:
                shares += 1000
                orders.iloc[j]["Orders"] = 1000
            # sell allowed
            elif action == 2 and shares in [0, 500]:
                shares -= 500
                orders.iloc[j]["Orders"] = -500
            elif action == 3 and shares == 500:
                shares -= 1000
                orders.iloc[j]["Orders"] = -1000
            else:
                orders.iloc[j]["Orders"] = 0
            nextState = np.ravel_multi_index(
                (bollBand.iloc[j][symbol], normalAverage.iloc[j][symbol],
                 MACDInd.iloc[j][symbol], momentum.iloc[j][symbol]),
                dims=dims)

            action = self.learner.querysetstate(nextState)

        return orders
Example #10
0
def main():
    if experiment_number == 1:
        parameters = Trade2Agents.Trade2Agents()
    elif experiment_number == 2:
        parameters = Trade3Agents.Trade3Agents()
    else:  # Default
        parameters = Trade2Agents.Trade2Agents()

    system = System.System(parameters)
    agents = []

    for g in parameters.agents:
        if parameters.agent_types[g] == "repnet":
            agents.append(RepNetAgent.RepNetAgent(g, system, parameters))
        elif parameters.agent_types[g] == "oracle":
            if experiment_number == 1:
                agents.append(Oracle2Agents.Oracle(g))
            elif experiment_number == 2:
                agents.append(Oracle3Agents.Oracle(g))
            else:  # Default
                agents.append(Oracle2Agents.Oracle(g))
        elif parameters.agent_types[g] == "mdp":
            agents.append(MDPAgent.MDPAgent(g, system, parameters))
        elif parameters.agent_types[g] == "qlearner":
            agents.append(QLearner.QLearner(g, system, parameters))
    repNetMDP = OnlineSolver.OnlineSolver(system,
                                          agents,
                                          parameters,
                                          experiment_number=experiment_number)

    repNetMDP.online_repnet_solver()
Example #11
0
 def __init__(self, verbose=False):
     self.verbose = verbose
     self.ql = ql.QLearner(num_states=3000,
                           num_actions=3,
                           rar=0.5,
                           radr=0.99,
                           dyna=30)
Example #12
0
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,12,31), \
        sv = 10000):
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        prices_SPY = prices_all[[
            'SPY',
        ]]  # only SPY, for comparison later
        if self.verbose: print prices

        #NOTE:Implementing Steps:
        #1. select & compute indicators(discretinize)
        TSI, momentum, _, priceSMA = indicators(prices)
        TSI_SPY, _, _, _ = indicators(prices_SPY)
        #TSI, momentum, bbp, priceSMA=indicators(prices)

        #2. set up the learner
        #NOTE:0 do nothing, 1 long, 2 short, the exact number should beprocessed with additional works
        states_num, state_list = self.set_state([TSI_SPY, priceSMA])
        self.learner = QLearner.QLearner(num_states=states_num, num_actions=3)
        #3. LOOP Until cumulative return is no longer improving:
        #Converge criteria: Policy changes?Returns?DF differences?Runtime exceeds?
        policy = np.zeros(prices.shape[0])
        policy_changes = policy.shape[0]
        loop_num = 40
        prices_SPY['label'] = np.zeros(prices.shape[0])
        for i in range(len(state_list)):
            prices_SPY['label'] += state_list[i] * (10**(len(state_list) - i -
                                                         1))

        while policy_changes > 0.05 * prices.shape[0] and loop_num > 1:
            #Day 0 is:
            policy_temp = policy.copy()
            self.learner.a = 0
            self.learner.s = int(prices_SPY['label'][0])
            holdings = 0
            #4. for each day of the data:
            for i in range(1, prices.shape[0]):
                #1. Compute the current state
                state = int(prices_SPY['label'][i])
                #2. Compute the reward for last state
                rew = holdings * float(prices.iloc[i] - prices.iloc[i - 1])
                #3. Train the learner with above data
                action = self.learner.query(state, rew)
                policy[i] = action
                #4. Implement the action the learner returned?
                if abs(holdings) < 2000:
                    if action:
                        if action == 1 and holdings < 1000:
                            holdings += 1000
                            prices = prices * (1 + self.impact)
                        elif action == 2 and holdings > -1000:
                            holdings -= 1000
                            prices = prices * (1 - self.impact)
            #Calculate loop parameter changes
            loop_num -= 1
            policy_change = policy[policy != policy_temp].shape[0]
    def __init__(self, num_shares=1000, epochs=100, num_steps=10, 
                 impact=0.0, commission=0.00, verbose=False, learner=ql.QLearner(num_states=3000, num_actions=3)):
        """
        
        Instantiate a StrategyLearner that can learn a trading policy.

        num_shares: The number of shares that can be traded in one order
        epochs:     The number of times to train the QLearner
        num_steps:  The number of steps used in getting thresholds for the
        discretization process. It is the number of groups to put data into.
        impact:     The amount the price moves against the trader compared to the
        historical data at each transaction
        commission: The fixed amount in dollars charged for each transaction
        verbose:    If True, print and plot data in add_evidence
        **kwargs:   Arguments for QLearner
        """
        
        self.epochs = epochs
        self.num_steps = num_steps
        self.num_shares = num_shares
        self.impact = impact
        self.commission = commission
        self.verbose = verbose
        self.window_size = 10
        self.q_learner = learner
def testqlearner(iteration_num=500, dyna=0):

    verbose = False  #print lots of debug stuff if True

    # read in the map
    inf = open('testworlds/world03.csv')
    data = np.array(
        [map(float,
             s.strip().split(',')) for s in inf.readlines()])
    originalmap = data.copy(
    )  #make a copy so we can revert to the original map later

    startpos = getrobotpos(data)  #find where the robot starts
    goalpos = getgoalpos(data)  #find where the goal is

    if verbose: printmap(data)

    rand.seed(5)

    learner = ql.QLearner(num_states=100,\
        num_actions = 4, \
        rar = 0.98, \
        radr = 0.9999, \
        dyna = dyna, \
        verbose=verbose) #initialize the learner

    my_results = np.zeros(iteration_num)

    #each iteration involves one trip to the goal
    for iteration in range(0, iteration_num):
        steps = 0
        data = originalmap.copy()
        robopos = startpos
        state = discretize(robopos)  #convert the location to a state
        action = learner.querysetstate(
            state)  #set the state and get first action
        while robopos != goalpos:

            #move to new location according to action and then get a new action
            newpos = movebot(data, robopos, action)
            if newpos == goalpos:
                r = 1  #reward for reaching the goal
            else:
                r = -1  #negative reward for not being at the goal
            state = discretize(newpos)
            action = learner.query(state, r)

            data[robopos] = 4  # mark where we've been for map printing
            data[newpos] = 2  # move to new location
            robopos = newpos  # update the location
            if verbose: printmap(data)
            if verbose: time.sleep(1)
            steps += 1

        print iteration, ",", steps
        my_results[iteration] = steps

    printmap(data)

    return my_results
Example #15
0
 def __init__(self, verbose=False, impact=0.0, **kwargs):
     self.verbose = verbose
     self.impact = impact
     self.bbp_bins = None
     self.sma_bins = None
     self.momentum_bins = None
     self.nbins = 9
     self.learner = ql.QLearner(**kwargs)
Example #16
0
def baseTester():
    ''' runs a somewhat comprehensive test'''
    try:
        import QLearner as ql
    except:
        pass

    #it is worth noting here that num_states can be 100 for any grid < 10x10 using the tuckerHash
    #we need a new hash algo if we are to use a grid outside those parameters
    baseKwargs = {'num_states':100, 'alpha':1.0, 'gamma':0.9, 'rar':0.5, 'radr':0.99, 'dyna':0, 'verbose':False}
    '''
    if you want to add your own test, add it here. I use a tuple to indicate one test it is:
    (csv file, expected convergence iterations, kwarg modifier, test name)
    '''
    myTestList = [('testEasyWorld.csv', 800, 13,{}, 'easy test'),
                  ('world01.csv', 7000, 16, {}, 'Tucker Test 1'),
                  ('world02.csv', 7000, 17, {}, 'Tucker Test 2'),
                  ('testGridWorld.csv', 5000, 20, {}, 'Leo Base Test'),
                  ('testGridWorld.csv', 18000, 20, {'alpha':.2}, 'Test Learning Rate'),
                  ('testEasyWorld.csv', 700, 13, {'rar': 0.05}, 'Test Exploration'),
                  ('testEasyWorld.csv', 700, 13, {'radr': 0.8}, 'Test Exploration Decay'),
                  ('testGridWorld.csv', 3000, 20, {'gamma':0.8}, 'Test Discount Rate'),
                  ('testGridWorld.csv', 1100, 20, {'dyna':100}, 'Test Dyna'),
                  ]
    
    fdtest=myTestList[7:9]              
                  
    #for test in myTestList:
    for test in fdtest:             
        print '-------------------------------'
        print test[4]
        world = GridWorld(test[0])
        testKwargs = copy(baseKwargs)
        for k in test[3].keys():
            testKwargs[k] = test[3][k]
        print 'parameters %s' % str(testKwargs)
        learner = ql.QLearner(**testKwargs)
        print world.grid
        myTester = QTester(world, learner)
        nIter = test[1]
        totalIter = nIter
        lastPolicyLength = 0
        #someone let me know if there's a better way to check for convergence time
        while (totalIter < (test[1] * 1.4)):
           myTester.nIter(nIter)
           nIter = int(.05*test[1])
           myPolicy = myTester.getPolicy()
           policyLength = len(myPolicy)
           totalIter += nIter
           if (lastPolicyLength == policyLength) and (policyLength < 100):
              print 'converged in approx %i iterations' % totalIter
              print policyLength, myPolicy, test[2]
              break
           lastPolicyLength = policyLength
        if (test[1]*1.2 >= totalIter) and (policyLength == test[2]):
           print '*** TEST PASSED ***'
        else:
           print 'xxx TEST FAILED xxx'
Example #17
0
def test_code():
    verbose = False
    filename = 'testworlds/world01.csv'
    inf = open(filename)
    lines = inf.readlines()
    data = np.array([list(map(float, s.strip().split(','))) for s in lines])
    originalmap = data.copy()  # make copy to revert to original later
    if verbose:
        printmap(data)

    rand.seed(5)

    # run dyna test
    learner = ql.QLearner(num_states=100,
                          num_actions=4,
                          alpha=0.2,
                          gamma=0.9,
                          rar=0.98,
                          radr=0.999,
                          dyna=0,
                          verbose=False)
    epochs = 500
    total_reward = test(data, epochs, learner, verbose)
    print(f"{epochs}, median total_reward {total_reward}")
    print()
    non_dyna_score = total_reward
    # run dyna test
    learner = ql.QLearner(num_states=100,
                          num_actions=4,
                          alpha=0.2,
                          gamma=0.9,
                          rar=0.5,
                          radr=0.99,
                          dyna=200,
                          verbose=False)
    epochs = 50
    data = originalmap.copy()
    total_reward = test(data, epochs, learner, verbose)
    print(f"{epochs}, median total_reward {total_reward}")
    dyna_score = total_reward
    print()
    print()
    print(f"results for {filename}")
    print(f"non_dyna_score: {non_dyna_score}")
    print(f"dyna_score    : {dyna_score}")
Example #18
0
 def __init__(self, verbose = False, impact=0.0, commission=0.0, numLong = 1000.0, numShort=1000.0):
     self.verbose = verbose
     self.impact = impact
     self.commission = commission
     self.bins = None
     self.ql = ql.QLearner(num_states=10000, num_actions=3, alpha=0.2, gamma=0.9, rar=0.5, radr=0.99, dyna=0, verbose=False)
     self.features = None
     self.numLong = numLong
     self.numShort = numShort
Example #19
0
 def __init__(self, verbose=False):
     self.verbose = verbose
     self.Q = QLearner.QLearner(num_states=10000,
                                num_actions=3,
                                alpha=0.2,
                                gamma=0.9,
                                rar=0.5,
                                radr=0.99,
                                dyna=50)
Example #20
0
 def __init__(self, verbose=False):
     self.verbose = verbose
     self.thresholds = np.zeros((STEPS, FEATURE_CNT))
     self.sv = 100000
     self.ql = ql.QLearner(num_states=3000,
                           num_actions=3,
                           rar=0.5,
                           radr=0.99,
                           dyna=40)
 def __init__(self, verbose=False):
     self.verbose = verbose
     self.ql = ql.QLearner(num_states=30000,
                           num_actions=3,
                           dyna=0,
                           rar=0.8,
                           radr=0.99)
     self.momentumWindow = 19
     self.rollingWindow = 20
Example #22
0
    def __init__(self, environment):
        self.env = environment

        self.qlearner = ql.QLearner(num_states=self.env.observation_space.n,
                                    num_actions=4,
                                    dyna=0,
                                    verbose=False,
                                    rar=0.9,
                                    radr=0.9)
        self.qlearner.alpha = 0.1
 def __init__(self, verbose=False):
     self.verbose = verbose
     self.ql = ql.QLearner(num_states=int(1e6),
                           num_actions=3,
                           alpha=0.1,
                           gamma=0.9,
                           rar=0.5,
                           radr=0.9,
                           dyna=0,
                           verbose=False)
 def __init__(self, verbose=False):
     self.verbose = verbose
     self.learner = ql.QLearner(num_states=1000,\
      num_actions = 3, \
      alpha = 0.5, \
         gamma = 0.9, \
         rar = 0.5, \
         radr = 0.999, \
         dyna = 0, \
         verbose=False) #initialize the learner
    def addEvidence(self, symbol = "JPM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        # add your code to do learning here
        indicators = compute_indicators(symbols=['JPM'],
                                        sd=sd,
                                        ed=ed,
                                        lookback=14,
                                        gen_plot=False)
        states = self.discreticize(indicators)
        states = states.astype(int)
        print "max_state", pd.to_numeric(states, downcast='integer')
        num_states = states.max() + 1

        learner = ql.QLearner(num_states=num_states,\
            num_actions = 3, \
            alpha = 0.2, \
            gamma = 0.9, \
            rar = 0.98, \
            radr = 0.999, \
            dyna = 0, \
            verbose=False)

        action = learner.querysetstate(
            states[0])  #set state and get first action

        # set initial state on first day
        print action

        # Calculate daily returns
        prices = ut.get_data([symbol], pd.date_range(sd, ed))
        daily_returns = ((prices / prices.shift(1)) - 1) * 100
        # print daily_returns[symbol]
        # print state
        for index, state in states[1:].iteritems():
            reward = daily_returns.loc[index, symbol]
            action = learner.query(state, reward)
            print reward, action

        # example usage of the old backward compatible util function
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        prices_SPY = prices_all['SPY']  # only SPY, for comparison later
        if self.verbose: print prices

        # example use with new colname
        volume_all = ut.get_data(syms, dates,
                                 colname="Volume")  # automatically adds SPY
        volume = volume_all[syms]  # only portfolio symbols
        volume_SPY = volume_all['SPY']  # only SPY, for comparison later
        if self.verbose: print volume
Example #26
0
 def __init__(self, verbose=False, impact=0.0):
     self.verbose = verbose
     self.impact = impact
     self.ql = ql.QLearner(num_states=1000,
                           num_actions=3,
                           alpha=0.2,
                           gamma=0.9,
                           rar=0.5,
                           radr=0.99,
                           dyna=0,
                           verbose=False)
 def __init__(self, verbose=False):
     self.verbose = verbose
     #initialize the Qlearner
     self.ql = ql.QLearner(num_states=288,
                           num_actions=3,
                           alpha=0.2,
                           gamma=0.9,
                           rar=0.99,
                           radr=0.99,
                           dyna=200,
                           verbose=False)
Example #28
0
 def testNoPerturbShort(self):
     g = Grid.Grid()
     l = QLearner.QLearner()
     e = Experiment(l, g, ((9, 2), (2, 9)), 1000, 2000)
     self.assertEqual(e.walker, l)
     self.assertEqual(e.grid, g)
     self.assertEqual(e.rewards, ((9, 2), (2, 9)))
     self.assertEqual(e.switch, 1000)
     self.assertEqual(e.stop, 2000)
     r = e.run()
     self.assertTrue(r > 0)
Example #29
0
 def __init__(self, verbose=False, impact=0.0):
     self.verbose = verbose
     self.impact = impact
     self.qlearner = ql.QLearner(num_states=1000, \
                                num_actions=3, \
                                alpha=0.2, \
                                gamma=0.9, \
                                rar=0.5, \
                                radr=0.99, \
                                dyna=0, \
                                verbose=False)
Example #30
0
 def testNoPerturbShort(self):
     g = Grid.Grid()
     l = QLearner.QLearner()
     e = Experiments(l, g, 1000, 2000, 5)
     self.assertEqual(e.walker, l)
     self.assertEqual(e.grid, g)
     self.assertEqual(e.switch, 1000)
     self.assertEqual(e.stop, 2000)
     self.assertEqual(e.repeats, 5)
     r = e.run()
     self.assertEqual(len(r), len(EXPERIMENTS))