Python makeの例、seoulai_gym.make Pythonの例

コード例 #1

0

ファイルを表示

ファイル: checkers_example.py プロジェクト: seoulai/gym

def main():
    env = gym.make("Checkers")

    a1 = RandomAgentLight()
    a2 = RandomAgentDark()

    obs = env.reset()

    current_agent = a1
    next_agent = a2

    while True:
        from_row, from_col, to_row, to_col = current_agent.act(obs)
        obs, rew, done, info = env.step(current_agent, from_row, from_col,
                                        to_row, to_col)
        current_agent.consume(obs, rew, done)

        if done:
            print(f"Game over! {current_agent} agent wins.")
            obs = env.reset()

        # switch agents
        temporary_agent = current_agent
        current_agent = next_agent
        next_agent = temporary_agent

    env.close()

コード例 #2

0

ファイルを表示

ファイル: checkers_example.py プロジェクト: madlymissyou/gym

def main():
    env = gym.make("Checkers")

    a1 = RandomAgentLight("Agent 1")
    a2 = RandomAgentDark("Agent 2")

    obs = env.reset()

    current_agent = a1
    next_agent = a2

    rew = 0
    done = False

    while True:
        from_row, from_col, to_row, to_col = current_agent.act(obs, rew, done)
        try:
            obs, rew, done, info = env.step(current_agent, from_row, from_col, to_row, to_col)
        except ValueError:
            print(f"Invalid move by {current_agent} agent.")
            break

        # switch agents
        temporary_agent = current_agent
        current_agent = next_agent
        next_agent = temporary_agent

        env.render()

        if done:
            print(f"Game over! {current_agent} agent wins.")
            obs = env.reset()

    env.close()

コード例 #3

0

ファイルを表示

def main():

    # make Market enviroment
    # TODO: add trading condition of real exchanges.
    # then users will be able to choose exchange.
    # gym.make("Market", exchange_name)
    env = gym.make("Market")

    # select exchange
    env.select("upbit")

    init_cash = 100000000  # KRW
    a1 = RandomAgentBuffett("Buffett", init_cash)
    current_agent = a1

    obs = env.reset()

    rew = 0  # reward
    done = False

    print(
        "tick\t\t decision\t\t trad_price(ccld_price)\t\t" +
        "trad_qty(ccld_qty)\t\t fee\t\t cash\t\t asset_qty\t\t" +
        "asset_val\t\t portfolio_val\t\t 1tick_return\t\t 1tick_ret_ratio\t\t "
    )
    i = 0
    while True:
        decision, trad_price, trad_qty = current_agent.act(obs, rew, done)
        try:
            obs, rew, done, info = env.step(current_agent, decision,
                                            trad_price, trad_qty)
            # data sheet
            print("%5d %4s %10lf %10lf %10lf %10lf %10lf %10lf %10lf %10lf" %
                  (i, decision, trad_price, trad_qty, info["fee"],
                   current_agent.cash, current_agent.asset_qty,
                   current_agent.asset_val, info["1tick_return"],
                   info["1tick_ret_ratio"]))

        except ValueError:
            break

        env.render(current_agent.cash + current_agent.asset_val, decision)

        if done:
            wallet = current_agent.cash + current_agent.asset_val
            diff = wallet - init_cash
            print("game over!!! " + info["msg"])
            print(
                "total result. Agent wallet: % f, Agent total_return: % f, Agent total_ret_ratio : %f"
                % (wallet, diff, ((wallet / init_cash) - 1) * 100))
            obs = env.reset()
            break

        i = i + 1

    env.close()

コード例 #4

0

ファイルを表示

ファイル: mighty_example.py プロジェクト: ayo79/gym

def main():
    #gym 환경명
    env = gym.make("Mighty")

    # 플레이어 이름, uid
    players = [
        RandomAgent("Agent 1", 0),
        RandomAgent("Agent 2", 1),
        RandomAgent("Agent 3", 2),
        RandomAgent("Agent 4", 3),
        RandomAgent("Agent 5", 4)
    ]

    # 환경 초기화
    obs = env.reset()

    # 플레이어 등록
    obs['game'].players = [
        players[0]._name, players[1]._name, players[2]._name, players[3]._name,
        players[4]._name
    ]
    turn = 0

    reward = 0
    done = False

    num_of_game = 10  # 구동할 게임 수
    while True:
        act = players[turn].act(obs, reward, done)
        print('\t %s' % (act), end=':')
        print(obs['board'].PLAYER_CARDS[turn])
        obs, rew, done, info = env.step(players[turn], act)

        # switch agents
        if 'turn' in info:
            turn = info['turn']
        else:
            turn = (turn + 1) % 5

        env.render()

        if done:
            num_of_game -= 1
            if num_of_game == 0:
                break
            obs = env.reset()

    input('end play')
    env.close()

コード例 #5

0

ファイルを表示

        4. You must return dictionary of actions.
    """

    your_actions = dict(
        holding=0,
        buy_20per=(+20, "%"),
        sell_20per=(-20, "%"),
    )

    a1 = MeanRevertingAgent(
        your_id,
        your_actions,
    )

    env = gym.make("Market")
    env.participate(your_id, mode)
    obs = env.reset()

    for t in count():  # Online RL
        print("step {0}".format(t))

        action = a1.act(obs)  # Local function
        next_obs, rewards, done, _ = env.step(**action)
        a1.postprocess(obs, action, next_obs, rewards)
        print("ACTION", action)
        print("REWARDS", rewards)

        if done:
            break

コード例 #6

0

ファイルを表示

ファイル: checkers_test.py プロジェクト: MRNAS/ARMTaskProcessCheckers

def main():
    env = gym2.make("Checkers")

    a1 = AgentLight()
    a2 = AgentDark()
    a3 = RandomAgentDark()
    a4 = RandomAgentLight()

    # agent1 = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env,
    #               batch_size=64,  layer1_size=400, layer2_size=300, n_actions=2,
    #               chkpt_dir='tmp/ddpg_final1')

    #do you want to activate the human model?
    # if false agent is 100% random. human is 50% DP 50% Random
    humanflag = True

    obs = env.reset()
    current_agent = a1  #robot agent
    next_agent = a2

    # next_agent = a4 #just moving a single piece

    #end of game condition
    Whitedone = False
    Blackdone = False

    score1 = 0
    score2 = 0
    # x=eval(input("What is the goal x position  coordinate?4"))
    # y=eval(input("What is the goal y position  coordinate?1"))
    # # x1=eval(input("What is the goal x position  coordinate?4"))
    # y2=eval(input("What is the goal y position  coordinate?1"))

    while True:
        #required to for agent's goals in DP
        if current_agent == a1:
            flag = True
            # print("a")
        elif current_agent == a4:
            flag = True
            # print("a")
        elif current_agent == a2:
            flag = False
            # print("b")
        elif current_agent == a3:
            flag = False
            # print("b")

        #dynamic vs random agents
        if current_agent == a1:
            # print("dyn")
            from_row, from_col, to_row, to_col = current_agent.act(
                obs, flag, humanflag)
        elif current_agent == a2:
            # print("dyn")
            from_row, from_col, to_row, to_col = current_agent.act(
                obs, flag, humanflag)
        elif current_agent == a3:
            # print("ran")
            from_row, from_col, to_row, to_col = current_agent.act(obs)
        elif current_agent == a4:
            # print("ran")
            from_row, from_col, to_row, to_col = current_agent.act(obs)
        else:
            print("error choosing agents")

        # print(to_row, to_col)
        obs, rew, done, info = env.step(current_agent, from_row, from_col,
                                        to_row, to_col)
        current_agent.consume(obs, rew, done)
        env.render()
        time.sleep(5)  #time delay

        if current_agent == a1:
            score1 += rew
            print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}")
        elif current_agent == a4:
            score1 += rew
            print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}")
        elif current_agent == a2:
            score2 += rew
            print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}")
        elif current_agent == a3:
            score2 += rew
            print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}")

        print(from_row, from_col, "status")
        # stopping conditions for switch if got to goal
        if from_row == 5 and from_col == 2:
            Whitedone = True
        elif from_row == 5 and from_col == 3:
            Blackdone = True

        if done:
            print(f"Game over! {current_agent} agent wins.")
            # obs = env.reset()
        elif Whitedone == True and Blackdone == True:
            print(f"Game over! {current_agent} agent wins.")
            # obs = env.reset()
            env.close()

        # switch agents
        if humanflag == True:
            decision = random.choice([0, 1])
            if decision < 0.5:
                #both are DP
                temporary_agent = current_agent
                current_agent = next_agent
                next_agent = temporary_agent
                print("both DP")
                if Whitedone == True:
                    print("white dyn trig")
                    current_agent = a2
                    next_agent = a3
                elif Blackdone == True:
                    print("black dyn trig")
                    current_agent = a1
                    next_agent = a4
            else:
                #robot DP & Human Random
                temporary_agent = current_agent
                current_agent = next_agent
                next_agent = temporary_agent
                if temporary_agent == a1:
                    current_agent = a3
                elif temporary_agent == a2:
                    current_agent = a4
                elif temporary_agent == a3:
                    current_agent = a1
                elif temporary_agent == a4:
                    current_agent = a2

                if Whitedone == True:
                    print("white ran trig")
                    current_agent = a3
                elif Blackdone == True:
                    print("black ran trig")
                    current_agent = a4
                print("rand and DP")
        else:
            temporary_agent = current_agent
            current_agent = next_agent
            next_agent = temporary_agent

    env.close()

コード例 #7

0

ファイルを表示

import seoulai_gym as gym
import tensorflow as tf
from seoulai_gym.envs.checkers.utils import board_list2numpy
from seoulai_gym.envs.checkers.agents import RandomAgentLight
from seoulai_gym.envs.checkers.base import Constants
from agent import DqnAgent

env = gym.make("Checkers")

####### GAME SETTING  #######
checkers_height = 8
checkers_width = 8


####### H PARAMS      #######
dis = 0.99

####### Model Restore, Save #######

save_file = './train_model.ckpt'

def main():
    max_episodes = 10000
    with tf.Session() as sess:
        # saver = tf.train.Saver()
        ####### Agent Setting #######
        MasterAgent = RandomAgentLight("Teacher Agent")
        MyAgent = DqnAgent(sess, "doublejtoh Agent", Constants().LIGHT)

        tf.global_variables_initializer().run()

コード例 #8

0

ファイルを表示

ファイル: checkers_pomdp.py プロジェクト: MRNAS/ARMTaskProcessCheckers

def main():
    env = gym2.make("Checkers")

    #robot agents
    a1 = AgentLight() 
    # a2 = AgentDark()

    #human agents
    a3 = HumanLight()
    # a4 = HumanDark()

    #random agents
    a5 = RandomAgentLight()
    # a6 = RandomAgentDark()

    obs = env.reset()

    #classify board
    whiteplayers = 0
    blackplayers = 0
    emptycells = 0
    size = len(obs)
    for i in range(size):
        for j in range(size):
                if obs[i][j] == None:
                    emptycells += 1
                elif obs[i][j].ptype == 1:
                    whiteplayers += 1
                elif obs[i][j].ptype == 2:
                    blackplayers += 1
                
    print("white players:",whiteplayers)
    print("black players:",blackplayers)
    print("amount of empty cells in 8x8 board:",emptycells)

    #activates DP robots and Humans
     #do you want to activate the human model?
    # if false agent is 100% random. human is 50% DP 50% Random
    robotflag = True 
    humanflag = True

    if robotflag == True:
        print("Robot DP Activated")
    else: 
        print("Robot DP Deactivated")

    if humanflag == True:
        print("Human Model Activated")
    else:
        print("Human Model Deactivated")

    robots = 1
    humans = 1
    nplayers = 64 - emptycells # change if board size changes
    print("nplayers in the game:", nplayers)
    
    #creates and requests list of objectives
    glist = []
    for i in range(0,nplayers):
        goal=(input("type (x,y) 6365 objective in xy format for each goal and press enter:"))
        glist.append(goal)
    # glist=('63','65')
    glist=tuple(glist)
    print(glist)


    # current_agent = a1 #robot agent
    # next_agent = a2

    # next_agent = a4 #just moving a single piece
        # agent1 = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env,
    #               batch_size=64,  layer1_size=400, layer2_size=300, n_actions=2,
    #               chkpt_dir='tmp/ddpg_final1')

    #end of game condition
    Whitedone = False
    Blackdone = False

    # to do: create the number of scores to the number of nplayers
    score1=0
    score2=0
    counter = 0
    
    #begin with robot first and human second
    current_agent = a1
    next_agent = a3

    while True:

        #  #required to for agent's goals in DP
        # if current_agent == a1:
        #     flag = True
        #     # print("a")
        # elif current_agent == a3:
        #     flag = False
        #     # print("b")
        # elif current_agent == a5:
        #     flag = False
        #     # print("b")
        
        from_row, from_col, to_row, to_col, flag = current_agent.act(obs,glist)
        obs, rew, done, info = env.step(current_agent, from_row, from_col, to_row, to_col)
        current_agent.consume(obs, rew, done)

        print(flag,"flag turn")
        env.render()
        time.sleep(3) #time delay
        counter += 1

        # valid_moves = Rules.generate_valid_moves(obs,ptype,board_size)
        # reward = 0
        # #identifies which piece we are moving with flag
        # decision = list(valid_moves.keys())
        # if (dyn_from_row, dyn_from_col) == decision[0]:
        #     reward = True
        #     # print("objective 1")
        # elif (dyn_from_row, dyn_from_col) == decision[1]:
        #     reward = False
        #     # print("objective 2")

        if current_agent == a1:
            score1 += rew
            print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}")
        # elif current_agent == a4:
        #     score1 += rew
        #     print(f"Reward:{rew}, total rewards: {score1} by: {current_agent}")
        # elif current_agent == a2:
        #     score2 += rew
        #     print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}")
        elif current_agent == a3:
            score2 += rew
            print(f"Reward:{rew}, total rewards: {score2} by: {current_agent}")

        # print(from_row,from_col,"status")
        # stopping conditions for switch if got to goal
        print("objective location:",from_row, from_col,"counter:",counter)
        if from_row == 6 and from_col == 3:
            print("white is done")
            Whitedone = True
        elif from_row == 6 and from_col == 5:
            print("black is done")
            Blackdone = True

        if done:
            print(f"Game over! {current_agent} agent wins.")
            # obs = env.reset()
        elif Whitedone == True and Blackdone == True:
            print(f"Game over! {current_agent} agent wins.")
            # obs = env.reset()
            env.close()

        # switch agents
        #sequence of agents based on inputs
        #alternate between humans and robots
        if robotflag == True and humanflag == True:
            turns = nplayers
            if counter % turns == 0: #human (even turn)
                #select any piece of the board at random
                temporary_agent = current_agent
                current_agent = next_agent
                next_agent = temporary_agent
                if Whitedone == True:
                    print("white is done agents")
                    current_agent = a3
                elif Blackdone ==True:
                    print("black is done agents")
                    current_agent = a1
            else: #robot (odd turn)
                current_agent = a1
                next_agent = a3
        #alternate between humans
        elif robotflag == False and humanflag == True:
            turns = nplayers
            current_agent = 0
            next_agent = 0
        #alternate between random agents
        elif robotflag == False and humanflag == False: 
            temporary_agent = current_agent
            current_agent = next_agent
            next_agent = temporary_agent


    env.close()