def main(): env = gym.make("Checkers") a1 = RandomAgentLight() a2 = RandomAgentDark() obs = env.reset() current_agent = a1 next_agent = a2 while True: from_row, from_col, to_row, to_col = current_agent.act(obs) obs, rew, done, info = env.step(current_agent, from_row, from_col, to_row, to_col) current_agent.consume(obs, rew, done) if done: print(f"Game over! {current_agent} agent wins.") obs = env.reset() # switch agents temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent env.close()
def main(): env = gym.make("Checkers") a1 = RandomAgentLight("Agent 1") a2 = RandomAgentDark("Agent 2") obs = env.reset() current_agent = a1 next_agent = a2 rew = 0 done = False while True: from_row, from_col, to_row, to_col = current_agent.act(obs, rew, done) try: obs, rew, done, info = env.step(current_agent, from_row, from_col, to_row, to_col) except ValueError: print(f"Invalid move by {current_agent} agent.") break # switch agents temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent env.render() if done: print(f"Game over! {current_agent} agent wins.") obs = env.reset() env.close()
def main(): # make Market enviroment # TODO: add trading condition of real exchanges. # then users will be able to choose exchange. # gym.make("Market", exchange_name) env = gym.make("Market") # select exchange env.select("upbit") init_cash = 100000000 # KRW a1 = RandomAgentBuffett("Buffett", init_cash) current_agent = a1 obs = env.reset() rew = 0 # reward done = False print( "tick\t\t decision\t\t trad_price(ccld_price)\t\t" + "trad_qty(ccld_qty)\t\t fee\t\t cash\t\t asset_qty\t\t" + "asset_val\t\t portfolio_val\t\t 1tick_return\t\t 1tick_ret_ratio\t\t " ) i = 0 while True: decision, trad_price, trad_qty = current_agent.act(obs, rew, done) try: obs, rew, done, info = env.step(current_agent, decision, trad_price, trad_qty) # data sheet print("%5d %4s %10lf %10lf %10lf %10lf %10lf %10lf %10lf %10lf" % (i, decision, trad_price, trad_qty, info["fee"], current_agent.cash, current_agent.asset_qty, current_agent.asset_val, info["1tick_return"], info["1tick_ret_ratio"])) except ValueError: break env.render(current_agent.cash + current_agent.asset_val, decision) if done: wallet = current_agent.cash + current_agent.asset_val diff = wallet - init_cash print("game over!!! " + info["msg"]) print( "total result. Agent wallet: % f, Agent total_return: % f, Agent total_ret_ratio : %f" % (wallet, diff, ((wallet / init_cash) - 1) * 100)) obs = env.reset() break i = i + 1 env.close()
def main(): #gym 환경명 env = gym.make("Mighty") # 플레이어 이름, uid players = [ RandomAgent("Agent 1", 0), RandomAgent("Agent 2", 1), RandomAgent("Agent 3", 2), RandomAgent("Agent 4", 3), RandomAgent("Agent 5", 4) ] # 환경 초기화 obs = env.reset() # 플레이어 등록 obs['game'].players = [ players[0]._name, players[1]._name, players[2]._name, players[3]._name, players[4]._name ] turn = 0 reward = 0 done = False num_of_game = 10 # 구동할 게임 수 while True: act = players[turn].act(obs, reward, done) print('\t %s' % (act), end=':') print(obs['board'].PLAYER_CARDS[turn]) obs, rew, done, info = env.step(players[turn], act) # switch agents if 'turn' in info: turn = info['turn'] else: turn = (turn + 1) % 5 env.render() if done: num_of_game -= 1 if num_of_game == 0: break obs = env.reset() input('end play') env.close()
4. You must return dictionary of actions. """ your_actions = dict( holding=0, buy_20per=(+20, "%"), sell_20per=(-20, "%"), ) a1 = MeanRevertingAgent( your_id, your_actions, ) env = gym.make("Market") env.participate(your_id, mode) obs = env.reset() for t in count(): # Online RL print("step {0}".format(t)) action = a1.act(obs) # Local function next_obs, rewards, done, _ = env.step(**action) a1.postprocess(obs, action, next_obs, rewards) print("ACTION", action) print("REWARDS", rewards) if done: break
def main(): env = gym2.make("Checkers") a1 = AgentLight() a2 = AgentDark() a3 = RandomAgentDark() a4 = RandomAgentLight() # agent1 = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, # batch_size=64, layer1_size=400, layer2_size=300, n_actions=2, # chkpt_dir='tmp/ddpg_final1') #do you want to activate the human model? # if false agent is 100% random. human is 50% DP 50% Random humanflag = True obs = env.reset() current_agent = a1 #robot agent next_agent = a2 # next_agent = a4 #just moving a single piece #end of game condition Whitedone = False Blackdone = False score1 = 0 score2 = 0 # x=eval(input("What is the goal x position coordinate?4")) # y=eval(input("What is the goal y position coordinate?1")) # # x1=eval(input("What is the goal x position coordinate?4")) # y2=eval(input("What is the goal y position coordinate?1")) while True: #required to for agent's goals in DP if current_agent == a1: flag = True # print("a") elif current_agent == a4: flag = True # print("a") elif current_agent == a2: flag = False # print("b") elif current_agent == a3: flag = False # print("b") #dynamic vs random agents if current_agent == a1: # print("dyn") from_row, from_col, to_row, to_col = current_agent.act( obs, flag, humanflag) elif current_agent == a2: # print("dyn") from_row, from_col, to_row, to_col = current_agent.act( obs, flag, humanflag) elif current_agent == a3: # print("ran") from_row, from_col, to_row, to_col = current_agent.act(obs) elif current_agent == a4: # print("ran") from_row, from_col, to_row, to_col = current_agent.act(obs) else: print("error choosing agents") # print(to_row, to_col) obs, rew, done, info = env.step(current_agent, from_row, from_col, to_row, to_col) current_agent.consume(obs, rew, done) env.render() time.sleep(5) #time delay if current_agent == a1: score1 += rew print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}") elif current_agent == a4: score1 += rew print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}") elif current_agent == a2: score2 += rew print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}") elif current_agent == a3: score2 += rew print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}") print(from_row, from_col, "status") # stopping conditions for switch if got to goal if from_row == 5 and from_col == 2: Whitedone = True elif from_row == 5 and from_col == 3: Blackdone = True if done: print(f"Game over! {current_agent} agent wins.") # obs = env.reset() elif Whitedone == True and Blackdone == True: print(f"Game over! {current_agent} agent wins.") # obs = env.reset() env.close() # switch agents if humanflag == True: decision = random.choice([0, 1]) if decision < 0.5: #both are DP temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent print("both DP") if Whitedone == True: print("white dyn trig") current_agent = a2 next_agent = a3 elif Blackdone == True: print("black dyn trig") current_agent = a1 next_agent = a4 else: #robot DP & Human Random temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent if temporary_agent == a1: current_agent = a3 elif temporary_agent == a2: current_agent = a4 elif temporary_agent == a3: current_agent = a1 elif temporary_agent == a4: current_agent = a2 if Whitedone == True: print("white ran trig") current_agent = a3 elif Blackdone == True: print("black ran trig") current_agent = a4 print("rand and DP") else: temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent env.close()
import seoulai_gym as gym import tensorflow as tf from seoulai_gym.envs.checkers.utils import board_list2numpy from seoulai_gym.envs.checkers.agents import RandomAgentLight from seoulai_gym.envs.checkers.base import Constants from agent import DqnAgent env = gym.make("Checkers") ####### GAME SETTING ####### checkers_height = 8 checkers_width = 8 ####### H PARAMS ####### dis = 0.99 ####### Model Restore, Save ####### save_file = './train_model.ckpt' def main(): max_episodes = 10000 with tf.Session() as sess: # saver = tf.train.Saver() ####### Agent Setting ####### MasterAgent = RandomAgentLight("Teacher Agent") MyAgent = DqnAgent(sess, "doublejtoh Agent", Constants().LIGHT) tf.global_variables_initializer().run()
def main(): env = gym2.make("Checkers") #robot agents a1 = AgentLight() # a2 = AgentDark() #human agents a3 = HumanLight() # a4 = HumanDark() #random agents a5 = RandomAgentLight() # a6 = RandomAgentDark() obs = env.reset() #classify board whiteplayers = 0 blackplayers = 0 emptycells = 0 size = len(obs) for i in range(size): for j in range(size): if obs[i][j] == None: emptycells += 1 elif obs[i][j].ptype == 1: whiteplayers += 1 elif obs[i][j].ptype == 2: blackplayers += 1 print("white players:",whiteplayers) print("black players:",blackplayers) print("amount of empty cells in 8x8 board:",emptycells) #activates DP robots and Humans #do you want to activate the human model? # if false agent is 100% random. human is 50% DP 50% Random robotflag = True humanflag = True if robotflag == True: print("Robot DP Activated") else: print("Robot DP Deactivated") if humanflag == True: print("Human Model Activated") else: print("Human Model Deactivated") robots = 1 humans = 1 nplayers = 64 - emptycells # change if board size changes print("nplayers in the game:", nplayers) #creates and requests list of objectives glist = [] for i in range(0,nplayers): goal=(input("type (x,y) 6365 objective in xy format for each goal and press enter:")) glist.append(goal) # glist=('63','65') glist=tuple(glist) print(glist) # current_agent = a1 #robot agent # next_agent = a2 # next_agent = a4 #just moving a single piece # agent1 = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, # batch_size=64, layer1_size=400, layer2_size=300, n_actions=2, # chkpt_dir='tmp/ddpg_final1') #end of game condition Whitedone = False Blackdone = False # to do: create the number of scores to the number of nplayers score1=0 score2=0 counter = 0 #begin with robot first and human second current_agent = a1 next_agent = a3 while True: # #required to for agent's goals in DP # if current_agent == a1: # flag = True # # print("a") # elif current_agent == a3: # flag = False # # print("b") # elif current_agent == a5: # flag = False # # print("b") from_row, from_col, to_row, to_col, flag = current_agent.act(obs,glist) obs, rew, done, info = env.step(current_agent, from_row, from_col, to_row, to_col) current_agent.consume(obs, rew, done) print(flag,"flag turn") env.render() time.sleep(3) #time delay counter += 1 # valid_moves = Rules.generate_valid_moves(obs,ptype,board_size) # reward = 0 # #identifies which piece we are moving with flag # decision = list(valid_moves.keys()) # if (dyn_from_row, dyn_from_col) == decision[0]: # reward = True # # print("objective 1") # elif (dyn_from_row, dyn_from_col) == decision[1]: # reward = False # # print("objective 2") if current_agent == a1: score1 += rew print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}") # elif current_agent == a4: # score1 += rew # print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}") # elif current_agent == a2: # score2 += rew # print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}") elif current_agent == a3: score2 += rew print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}") # print(from_row,from_col,"status") # stopping conditions for switch if got to goal print("objective location:",from_row, from_col,"counter:",counter) if from_row == 6 and from_col == 3: print("white is done") Whitedone = True elif from_row == 6 and from_col == 5: print("black is done") Blackdone = True if done: print(f"Game over! {current_agent} agent wins.") # obs = env.reset() elif Whitedone == True and Blackdone == True: print(f"Game over! {current_agent} agent wins.") # obs = env.reset() env.close() # switch agents #sequence of agents based on inputs #alternate between humans and robots if robotflag == True and humanflag == True: turns = nplayers if counter % turns == 0: #human (even turn) #select any piece of the board at random temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent if Whitedone == True: print("white is done agents") current_agent = a3 elif Blackdone ==True: print("black is done agents") current_agent = a1 else: #robot (odd turn) current_agent = a1 next_agent = a3 #alternate between humans elif robotflag == False and humanflag == True: turns = nplayers current_agent = 0 next_agent = 0 #alternate between random agents elif robotflag == False and humanflag == False: temporary_agent = current_agent current_agent = next_agent next_agent = temporary_agent env.close()