def q_learn_vs_q_learn(action_size, batch_size, env, hidden, state_size): epsilon1 = 0.8 a1 = QLearner.QLearner(env.state, state_size, action_size, epsilon1, 0.99, env) epsilon2 = 0.8 a2 = QLearner.QLearner(env.state, state_size, action_size, epsilon2, 0.99, env) EVAL_OPPONENT_OPTIMALITY_PROPORTION = 1 eval_opponent = RandomOptimalAgent.RandomOptimalAgent( EVAL_OPPONENT_OPTIMALITY_PROPORTION, state_size, action_size, mem_len=0) # number of training games number_of_games = 15000 t = Trainer(env, a1, a2, eval_opponent) t.experiment(hidden=hidden, epsilon1=epsilon1, epsilon2=epsilon2, eps_auto_decay=True, eps_limit=10000, n=number_of_games, batch_size=batch_size, selfplay=False) return
def test_q_learner(): #this test should fail if test_q_value_iteration_aima_mdp fails test_q_value_iteration_aima_mdp() mdp = get_aima_mdp() Q_opt = mdp.q_value_iteration() agent = QLearner(mdp, 0.06) learned_Q = agent.learn(350000, epsilon=0.3, anneal_rate=0.0000001) np.testing.assert_array_almost_equal(Q_opt, learned_Q, decimal=2)
def test_code(): verbose = True # print lots of debug stuff if True # read in the map filename = 'testworlds/world03.csv' inf = open(filename) data = np.array( [map(float, s.strip().split(',')) for s in inf.readlines()]) originalmap = data.copy( ) #make a copy so we can revert to the original map later if verbose: printmap(data) rand.seed(5) ######## run non-dyna test ######## learner = ql.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.98, \ radr = 0.999, \ dyna = 0, \ verbose=False) #initialize the learner iterations = 500 start = time.time() total_reward = test(data, iterations, learner, verbose) stop = time.time() print iterations, "median total_reward", total_reward print non_dyna_score = total_reward print 'time (s): {}'.format(stop - start) ######## run dyna test ######## learner = ql.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.5, \ radr = 0.99, \ dyna = 200, \ verbose=False) #initialize the learner iterations = 50 data = originalmap.copy() start = time.time() total_reward = test(data, iterations, learner, verbose) stop = time.time() print iterations, "median total_reward", total_reward dyna_score = total_reward print 'time (s): {}'.format(stop - start) print print print "results for", filename print "non_dyna_score:", non_dyna_score print "dyna_score :", dyna_score
def test_code(): verbose = True # print lots of debug stuff if True # read in the map filename = "testworlds/world01.csv" inf = open(filename) data = np.array( [list(map(float, s.strip().split(","))) for s in inf.readlines()]) originalmap = (data.copy() ) # make a copy so we can revert to the original map later if verbose: printmap(data) rand.seed(5) ######## run non-dyna test ######## learner = ql.QLearner( num_states=100, num_actions=4, alpha=0.2, gamma=0.9, rar=0.98, radr=0.999, dyna=0, verbose=False, ) # initialize the learner epochs = 500 total_reward = test(data, epochs, learner, verbose) print(f"{epochs}, median total_reward {total_reward}") print() non_dyna_score = total_reward ######## run dyna test ######## learner = ql.QLearner( num_states=100, num_actions=4, alpha=0.2, gamma=0.9, rar=0.5, radr=0.99, dyna=200, verbose=False, ) # initialize the learner epochs = 50 data = originalmap.copy() total_reward = test(data, epochs, learner, verbose) print(f"{epochs}, median total_reward {total_reward}") dyna_score = total_reward print() print() print(f"results for {filename}") print(f"non_dyna_score: {non_dyna_score}") print(f"dyna_score : {dyna_score}")
def test_code(): verbose = False # print lots of debug stuff if True if len(sys.argv) != 2: print "Usage: python testlearner.py <filename>" sys.exit(1) # read in the map filename = sys.argv[1] f = open(filename) data = np.array([map(float, s.strip().split(',')) for s in f.readlines()]) # make a copy so we can revert to the original map later originalmap = data.copy() if verbose: printmap(data) rand.seed(5) # run non-dyna test # start = timer() learner = ql.QLearner(num_states=100, num_actions=4, alpha=0.2, gamma=0.9, rar=0.98, radr=0.999, dyna=0, verbose=False) # initialize the learner iterations = 500 non_dyna_score = test(data, iterations, learner, verbose) end = timer() non_dyna_time = (end - start) # run dyna test # start = timer() learner = ql.QLearner(num_states=100, num_actions=4, alpha=0.2, gamma=0.9, rar=0.5, radr=0.99, dyna=200, verbose=False) # initialize the learner iterations = 50 data = originalmap.copy() dyna_score = test(data, iterations, learner, verbose) end = timer() dyna_time = (end - start) print '--------------------' print "results for", filename print '{:15} {}'.format('non_dyna_score:', non_dyna_score), print '{:15} {:.2f}s'.format('non_dyna_time:', non_dyna_time) print '{:15} {}'.format('dyna_score:', dyna_score), print '{:15} {:.2f}s'.format('dyna_time:', dyna_time)
def test_code(): verbose = False # print lots of debug stuff if True # read in the map filename = 'testworlds/world01.csv' inf = open(filename) data = np.array( [map(float, s.strip().split(',')) for s in inf.readlines()]) originalmap = data.copy( ) #make a copy so we can revert to the original map later if verbose: printmap(data) rand.seed(5) ######## run non-dyna test ######## learner = ql.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.5, \ radr = 0.999, \ dyna = 0, \ verbose=False) #initialize the learner iterations = 1 total_reward = test(data, iterations, learner, verbose) print "results for", filename print iterations, "iterations with total_reward", total_reward ######## run dyna test ######## learner = ql.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.5, \ radr = 0.99, \ dyna = 10, \ verbose=False) #initialize the learner iterations = 1 data = originalmap.copy() start = time.time() total_reward = test(data, iterations, learner, verbose) end = time.time() print('dyna iterations', end - start) print "results for", filename print iterations, "iterations with total_reward", total_reward import cProfile cProfile.run('test(data, iterations, learner, verbose)')
def test_code(): robot_qlearning_testing_seed = 1490652871 rand.seed(robot_qlearning_testing_seed) np.random.seed(robot_qlearning_testing_seed) verbose = True # print lots of debug stuff if True # read in the map filename = 'testworlds/world01.csv' inf = open(filename) data = np.array([map(float,s.strip().split(',')) for s in inf.readlines()]) originalmap = data.copy() #make a copy so we can revert to the original map later if verbose: printmap(data) rand.seed(5) ######## run non-dyna test ######## learner = ql.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.98, \ radr = 0.999, \ dyna = 0, \ verbose=False) #initialize the learner epochs = 500 total_reward = test(data, epochs, learner, verbose) print epochs, "median total_reward" , total_reward print non_dyna_score = total_reward ######## run dyna test ######## learner = ql.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.5, \ radr = 0.99, \ dyna = 200, \ verbose=False) #initialize the learner epochs = 50 data = originalmap.copy() total_reward = test(data, epochs, learner, verbose) print epochs, "median total_reward" , total_reward dyna_score = total_reward print print print "results for", filename print "non_dyna_score:", non_dyna_score print "dyna_score :", dyna_score
def __init__(self, impact=0.0, num_shares = 1000, epochs = 100, num_steps=10, commission = 0.00, verbose = False, **kwargs): """Create a strategy learner (Q-learning based) that can learn a trading policy Inputs / Parameters: impact: The amount the price moves against the trader compared to the historical data at each transaction num_shares: The number of shares that can be traded in one order epochs: The number of times to train the QLearner num_steps: The number of steps used in getting thresholds for the discretization process. It is the number of groups to put data into. commission: The fixed amount in dollars charged for each transaction verbose: If False, no plots. If True, print and plot data in add_evidence **kwargs: These are the arguments for QLearner """ # Set constants for positions (which become our order signals) self.SHORT = -1.0 self.CASH = 0.0 self.LONG = 1.0 self.epochs = epochs self.num_steps = num_steps self.num_shares = num_shares self.impact = impact self.commission = commission self.verbose = verbose # Initialize a QLearner for this Strategy Learner self.QLearner = ql.QLearner(**kwargs)
def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): syms = [symbol] dates = pd.date_range(sd, ed) # Call indicators function to receive indicators with dates before starting range normalAverage, bollBand, MACDInd, momentum, prices = plotIndicators( sd - dt.timedelta(days=80), ed, syms) # Get indicators for the given dates bollBand = bollBand.loc[prices.index >= sd] normalAverage = normalAverage.loc[prices.index >= sd] MACDInd = MACDInd.loc[prices.index >= sd] momentum = momentum.loc[prices.index >= sd] prices = prices.loc[prices.index >= sd] # Discretize values in the dataframs bollBand = self.dfDiscretize(bollBand, steps=self.bins) normalAverage = self.dfDiscretize(normalAverage, steps=self.bins) MACDInd = self.dfDiscretize(MACDInd, steps=self.bins) momentum = self.dfDiscretize(momentum, steps=self.bins) firstState = (bollBand.iloc[0][symbol], normalAverage.iloc[0][symbol], MACDInd.iloc[0][symbol], momentum.iloc[0][symbol]) dims = (self.bins, self.bins, self.bins, self.bins) firstState = np.ravel_multi_index(firstState, dims=dims) self.learner = ql.QLearner(num_states=self.bins**4, num_actions=5, rar=0.98, verbose=False) action = self.learner.querysetstate(firstState) shares = 0 orders = pd.DataFrame(columns=["Orders"], index=prices.index) for j in range(len(prices.index)): # Buy allowed if action == 0 and shares in [0, -500]: shares += 500 orders.iloc[j]["Orders"] = 500 elif action == 1 and shares == -500: shares += 1000 orders.iloc[j]["Orders"] = 1000 # sell allowed elif action == 2 and shares in [0, 500]: shares -= 500 orders.iloc[j]["Orders"] = -500 elif action == 3 and shares == 500: shares -= 1000 orders.iloc[j]["Orders"] = -1000 else: orders.iloc[j]["Orders"] = 0 nextState = np.ravel_multi_index( (bollBand.iloc[j][symbol], normalAverage.iloc[j][symbol], MACDInd.iloc[j][symbol], momentum.iloc[j][symbol]), dims=dims) action = self.learner.querysetstate(nextState) return orders
def main(): if experiment_number == 1: parameters = Trade2Agents.Trade2Agents() elif experiment_number == 2: parameters = Trade3Agents.Trade3Agents() else: # Default parameters = Trade2Agents.Trade2Agents() system = System.System(parameters) agents = [] for g in parameters.agents: if parameters.agent_types[g] == "repnet": agents.append(RepNetAgent.RepNetAgent(g, system, parameters)) elif parameters.agent_types[g] == "oracle": if experiment_number == 1: agents.append(Oracle2Agents.Oracle(g)) elif experiment_number == 2: agents.append(Oracle3Agents.Oracle(g)) else: # Default agents.append(Oracle2Agents.Oracle(g)) elif parameters.agent_types[g] == "mdp": agents.append(MDPAgent.MDPAgent(g, system, parameters)) elif parameters.agent_types[g] == "qlearner": agents.append(QLearner.QLearner(g, system, parameters)) repNetMDP = OnlineSolver.OnlineSolver(system, agents, parameters, experiment_number=experiment_number) repNetMDP.online_repnet_solver()
def __init__(self, verbose=False): self.verbose = verbose self.ql = ql.QLearner(num_states=3000, num_actions=3, rar=0.5, radr=0.99, dyna=30)
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,12,31), \ sv = 10000): syms = [symbol] dates = pd.date_range(sd, ed) prices_all = ut.get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all[[ 'SPY', ]] # only SPY, for comparison later if self.verbose: print prices #NOTE:Implementing Steps: #1. select & compute indicators(discretinize) TSI, momentum, _, priceSMA = indicators(prices) TSI_SPY, _, _, _ = indicators(prices_SPY) #TSI, momentum, bbp, priceSMA=indicators(prices) #2. set up the learner #NOTE:0 do nothing, 1 long, 2 short, the exact number should beprocessed with additional works states_num, state_list = self.set_state([TSI_SPY, priceSMA]) self.learner = QLearner.QLearner(num_states=states_num, num_actions=3) #3. LOOP Until cumulative return is no longer improving: #Converge criteria: Policy changes?Returns?DF differences?Runtime exceeds? policy = np.zeros(prices.shape[0]) policy_changes = policy.shape[0] loop_num = 40 prices_SPY['label'] = np.zeros(prices.shape[0]) for i in range(len(state_list)): prices_SPY['label'] += state_list[i] * (10**(len(state_list) - i - 1)) while policy_changes > 0.05 * prices.shape[0] and loop_num > 1: #Day 0 is: policy_temp = policy.copy() self.learner.a = 0 self.learner.s = int(prices_SPY['label'][0]) holdings = 0 #4. for each day of the data: for i in range(1, prices.shape[0]): #1. Compute the current state state = int(prices_SPY['label'][i]) #2. Compute the reward for last state rew = holdings * float(prices.iloc[i] - prices.iloc[i - 1]) #3. Train the learner with above data action = self.learner.query(state, rew) policy[i] = action #4. Implement the action the learner returned? if abs(holdings) < 2000: if action: if action == 1 and holdings < 1000: holdings += 1000 prices = prices * (1 + self.impact) elif action == 2 and holdings > -1000: holdings -= 1000 prices = prices * (1 - self.impact) #Calculate loop parameter changes loop_num -= 1 policy_change = policy[policy != policy_temp].shape[0]
def __init__(self, num_shares=1000, epochs=100, num_steps=10, impact=0.0, commission=0.00, verbose=False, learner=ql.QLearner(num_states=3000, num_actions=3)): """ Instantiate a StrategyLearner that can learn a trading policy. num_shares: The number of shares that can be traded in one order epochs: The number of times to train the QLearner num_steps: The number of steps used in getting thresholds for the discretization process. It is the number of groups to put data into. impact: The amount the price moves against the trader compared to the historical data at each transaction commission: The fixed amount in dollars charged for each transaction verbose: If True, print and plot data in add_evidence **kwargs: Arguments for QLearner """ self.epochs = epochs self.num_steps = num_steps self.num_shares = num_shares self.impact = impact self.commission = commission self.verbose = verbose self.window_size = 10 self.q_learner = learner
def testqlearner(iteration_num=500, dyna=0): verbose = False #print lots of debug stuff if True # read in the map inf = open('testworlds/world03.csv') data = np.array( [map(float, s.strip().split(',')) for s in inf.readlines()]) originalmap = data.copy( ) #make a copy so we can revert to the original map later startpos = getrobotpos(data) #find where the robot starts goalpos = getgoalpos(data) #find where the goal is if verbose: printmap(data) rand.seed(5) learner = ql.QLearner(num_states=100,\ num_actions = 4, \ rar = 0.98, \ radr = 0.9999, \ dyna = dyna, \ verbose=verbose) #initialize the learner my_results = np.zeros(iteration_num) #each iteration involves one trip to the goal for iteration in range(0, iteration_num): steps = 0 data = originalmap.copy() robopos = startpos state = discretize(robopos) #convert the location to a state action = learner.querysetstate( state) #set the state and get first action while robopos != goalpos: #move to new location according to action and then get a new action newpos = movebot(data, robopos, action) if newpos == goalpos: r = 1 #reward for reaching the goal else: r = -1 #negative reward for not being at the goal state = discretize(newpos) action = learner.query(state, r) data[robopos] = 4 # mark where we've been for map printing data[newpos] = 2 # move to new location robopos = newpos # update the location if verbose: printmap(data) if verbose: time.sleep(1) steps += 1 print iteration, ",", steps my_results[iteration] = steps printmap(data) return my_results
def __init__(self, verbose=False, impact=0.0, **kwargs): self.verbose = verbose self.impact = impact self.bbp_bins = None self.sma_bins = None self.momentum_bins = None self.nbins = 9 self.learner = ql.QLearner(**kwargs)
def baseTester(): ''' runs a somewhat comprehensive test''' try: import QLearner as ql except: pass #it is worth noting here that num_states can be 100 for any grid < 10x10 using the tuckerHash #we need a new hash algo if we are to use a grid outside those parameters baseKwargs = {'num_states':100, 'alpha':1.0, 'gamma':0.9, 'rar':0.5, 'radr':0.99, 'dyna':0, 'verbose':False} ''' if you want to add your own test, add it here. I use a tuple to indicate one test it is: (csv file, expected convergence iterations, kwarg modifier, test name) ''' myTestList = [('testEasyWorld.csv', 800, 13,{}, 'easy test'), ('world01.csv', 7000, 16, {}, 'Tucker Test 1'), ('world02.csv', 7000, 17, {}, 'Tucker Test 2'), ('testGridWorld.csv', 5000, 20, {}, 'Leo Base Test'), ('testGridWorld.csv', 18000, 20, {'alpha':.2}, 'Test Learning Rate'), ('testEasyWorld.csv', 700, 13, {'rar': 0.05}, 'Test Exploration'), ('testEasyWorld.csv', 700, 13, {'radr': 0.8}, 'Test Exploration Decay'), ('testGridWorld.csv', 3000, 20, {'gamma':0.8}, 'Test Discount Rate'), ('testGridWorld.csv', 1100, 20, {'dyna':100}, 'Test Dyna'), ] fdtest=myTestList[7:9] #for test in myTestList: for test in fdtest: print '-------------------------------' print test[4] world = GridWorld(test[0]) testKwargs = copy(baseKwargs) for k in test[3].keys(): testKwargs[k] = test[3][k] print 'parameters %s' % str(testKwargs) learner = ql.QLearner(**testKwargs) print world.grid myTester = QTester(world, learner) nIter = test[1] totalIter = nIter lastPolicyLength = 0 #someone let me know if there's a better way to check for convergence time while (totalIter < (test[1] * 1.4)): myTester.nIter(nIter) nIter = int(.05*test[1]) myPolicy = myTester.getPolicy() policyLength = len(myPolicy) totalIter += nIter if (lastPolicyLength == policyLength) and (policyLength < 100): print 'converged in approx %i iterations' % totalIter print policyLength, myPolicy, test[2] break lastPolicyLength = policyLength if (test[1]*1.2 >= totalIter) and (policyLength == test[2]): print '*** TEST PASSED ***' else: print 'xxx TEST FAILED xxx'
def test_code(): verbose = False filename = 'testworlds/world01.csv' inf = open(filename) lines = inf.readlines() data = np.array([list(map(float, s.strip().split(','))) for s in lines]) originalmap = data.copy() # make copy to revert to original later if verbose: printmap(data) rand.seed(5) # run dyna test learner = ql.QLearner(num_states=100, num_actions=4, alpha=0.2, gamma=0.9, rar=0.98, radr=0.999, dyna=0, verbose=False) epochs = 500 total_reward = test(data, epochs, learner, verbose) print(f"{epochs}, median total_reward {total_reward}") print() non_dyna_score = total_reward # run dyna test learner = ql.QLearner(num_states=100, num_actions=4, alpha=0.2, gamma=0.9, rar=0.5, radr=0.99, dyna=200, verbose=False) epochs = 50 data = originalmap.copy() total_reward = test(data, epochs, learner, verbose) print(f"{epochs}, median total_reward {total_reward}") dyna_score = total_reward print() print() print(f"results for {filename}") print(f"non_dyna_score: {non_dyna_score}") print(f"dyna_score : {dyna_score}")
def __init__(self, verbose = False, impact=0.0, commission=0.0, numLong = 1000.0, numShort=1000.0): self.verbose = verbose self.impact = impact self.commission = commission self.bins = None self.ql = ql.QLearner(num_states=10000, num_actions=3, alpha=0.2, gamma=0.9, rar=0.5, radr=0.99, dyna=0, verbose=False) self.features = None self.numLong = numLong self.numShort = numShort
def __init__(self, verbose=False): self.verbose = verbose self.Q = QLearner.QLearner(num_states=10000, num_actions=3, alpha=0.2, gamma=0.9, rar=0.5, radr=0.99, dyna=50)
def __init__(self, verbose=False): self.verbose = verbose self.thresholds = np.zeros((STEPS, FEATURE_CNT)) self.sv = 100000 self.ql = ql.QLearner(num_states=3000, num_actions=3, rar=0.5, radr=0.99, dyna=40)
def __init__(self, verbose=False): self.verbose = verbose self.ql = ql.QLearner(num_states=30000, num_actions=3, dyna=0, rar=0.8, radr=0.99) self.momentumWindow = 19 self.rollingWindow = 20
def __init__(self, environment): self.env = environment self.qlearner = ql.QLearner(num_states=self.env.observation_space.n, num_actions=4, dyna=0, verbose=False, rar=0.9, radr=0.9) self.qlearner.alpha = 0.1
def __init__(self, verbose=False): self.verbose = verbose self.ql = ql.QLearner(num_states=int(1e6), num_actions=3, alpha=0.1, gamma=0.9, rar=0.5, radr=0.9, dyna=0, verbose=False)
def __init__(self, verbose=False): self.verbose = verbose self.learner = ql.QLearner(num_states=1000,\ num_actions = 3, \ alpha = 0.5, \ gamma = 0.9, \ rar = 0.5, \ radr = 0.999, \ dyna = 0, \ verbose=False) #initialize the learner
def addEvidence(self, symbol = "JPM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,1,1), \ sv = 10000): # add your code to do learning here indicators = compute_indicators(symbols=['JPM'], sd=sd, ed=ed, lookback=14, gen_plot=False) states = self.discreticize(indicators) states = states.astype(int) print "max_state", pd.to_numeric(states, downcast='integer') num_states = states.max() + 1 learner = ql.QLearner(num_states=num_states,\ num_actions = 3, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.98, \ radr = 0.999, \ dyna = 0, \ verbose=False) action = learner.querysetstate( states[0]) #set state and get first action # set initial state on first day print action # Calculate daily returns prices = ut.get_data([symbol], pd.date_range(sd, ed)) daily_returns = ((prices / prices.shift(1)) - 1) * 100 # print daily_returns[symbol] # print state for index, state in states[1:].iteritems(): reward = daily_returns.loc[index, symbol] action = learner.query(state, reward) print reward, action # example usage of the old backward compatible util function syms = [symbol] dates = pd.date_range(sd, ed) prices_all = ut.get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later if self.verbose: print prices # example use with new colname volume_all = ut.get_data(syms, dates, colname="Volume") # automatically adds SPY volume = volume_all[syms] # only portfolio symbols volume_SPY = volume_all['SPY'] # only SPY, for comparison later if self.verbose: print volume
def __init__(self, verbose=False, impact=0.0): self.verbose = verbose self.impact = impact self.ql = ql.QLearner(num_states=1000, num_actions=3, alpha=0.2, gamma=0.9, rar=0.5, radr=0.99, dyna=0, verbose=False)
def __init__(self, verbose=False): self.verbose = verbose #initialize the Qlearner self.ql = ql.QLearner(num_states=288, num_actions=3, alpha=0.2, gamma=0.9, rar=0.99, radr=0.99, dyna=200, verbose=False)
def testNoPerturbShort(self): g = Grid.Grid() l = QLearner.QLearner() e = Experiment(l, g, ((9, 2), (2, 9)), 1000, 2000) self.assertEqual(e.walker, l) self.assertEqual(e.grid, g) self.assertEqual(e.rewards, ((9, 2), (2, 9))) self.assertEqual(e.switch, 1000) self.assertEqual(e.stop, 2000) r = e.run() self.assertTrue(r > 0)
def __init__(self, verbose=False, impact=0.0): self.verbose = verbose self.impact = impact self.qlearner = ql.QLearner(num_states=1000, \ num_actions=3, \ alpha=0.2, \ gamma=0.9, \ rar=0.5, \ radr=0.99, \ dyna=0, \ verbose=False)
def testNoPerturbShort(self): g = Grid.Grid() l = QLearner.QLearner() e = Experiments(l, g, 1000, 2000, 5) self.assertEqual(e.walker, l) self.assertEqual(e.grid, g) self.assertEqual(e.switch, 1000) self.assertEqual(e.stop, 2000) self.assertEqual(e.repeats, 5) r = e.run() self.assertEqual(len(r), len(EXPERIMENTS))