예제 #1
0
파일: train.py 프로젝트: anonNo2/rl_dealer
def main(_):
    FLAGS.agent = model(params=FLAGS)
    FLAGS.environment = get_env(FLAGS)
    FLAGS.act = action()

    FLAGS.step_max = FLAGS.environment.data_len()
    FLAGS.train_freq = 40
    FLAGS.update_q_freq = 50
    FLAGS.gamma = 0.97
    FLAGS.show_log_freq = 5
    FLAGS.memory = []  #Experience(FLAGS.memory_size)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    #创建用于保存模型的目录
    if not os.path.exists(FLAGS.model_dir):
        os.makedirs(FLAGS.model_dir)
    start = time.time()

    with tf.Session() as sess:
        sess.run(init)
        eval = evaluation(FLAGS, sess)
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt:
            print('Loading Model...')
            saver.restore(sess, ckpt.model_checkpoint_path)
        total_step = 1
        print('\t'.join(
            map(str, [
                "epoch", "epsilon", "total_step", "rewardPerEpoch", "profits",
                "lossPerBatch", "elapsed_time"
            ])))
        for epoch in range(FLAGS.epoch_num):
            avg_loss_per_batch, total_reward, total_step, profits = run_epch(
                FLAGS, sess, total_step)
            # total_rewards.append(total_reward)
            # total_losses.append(total_loss)

            if (epoch + 1) % FLAGS.show_log_freq == 0:
                # log_reward = sum(total_rewards[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq
                # log_loss = sum(total_losses[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq
                elapsed_time = time.time() - start
                #print('\t'.join(map(str, [epoch+1, FLAGS.act.epsilon, total_step, log_reward, log_loss, elapsed_time])))
                print('\t'.join(
                    map(str, [
                        epoch + 1, FLAGS.act.epsilon, total_step, total_reward,
                        profits, avg_loss_per_batch, elapsed_time
                    ])))
                start = time.time()

                saver.save(
                    sess,
                    FLAGS.model_dir + '\model-' + str(epoch + 1) + '.ckpt')
                eval.eval()
예제 #2
0
def play_a_game(commentary=False):
    board = init_board()  # initialize the board
    player = np.random.randint(2) * 2 - 1  # which player begins?
    #nrMove = 0

    # play on
    while not game_over(board) and not check_for_error(board):
        if commentary: print("lets go player ", player)

        # roll dice
        dice = roll_dice()
        if commentary: print("rolled dices:", dice)

        # make a move (2 moves if the same number appears on the dice)
        for i in range(1 + int(dice[0] == dice[1])):
            board_copy = np.copy(board)

            # make the move (agent vs agent):
            #move = agent.action(board_copy,dice,player,i)
            #agent.update(board_copy, nrMove)

            # if you're playing vs random agent:
            if player == 1:
                move = agent.action(board_copy, dice, player, i)
                #agent.update(board_copy, nrMove)
            elif player == -1:
                move = random_agent(board_copy, dice, player, i)

            # update the board
            if len(move) != 0:
                for m in move:
                    board = update_board(board, m, player)

            # give status after every move:
            if commentary:
                print("move from player", player, ":")
                pretty_print(board)

        #nrMove += 1

        # players take turns
        player = -player

    #reward = 1 if player == -1 else 0
    #agent.update(np.copy(board), nrMove, reward=reward)

    # return the winner
    return -1 * player
예제 #3
0
def play_a_game(commentary=False, net=None):
    board = init_board()  # initialize the board
    player = np.random.randint(2) * 2 - 1  # which player begins?
    net.i = 1
    # play on
    while not game_over(board) and not check_for_error(board):
        if commentary:
            print("lets go player ", player)
        # roll dice
        dice = roll_dice()
        if commentary:
            print("rolled dices:", dice)
        # make a move (2 moves if the same number appears on the dice)
        for i in range(1 + int(dice[0] == dice[1])):
            board_copy = np.copy(board)

            # make the move (agent vs agent):
            # move = agent.action(board_copy, dice, player, i)

            # if you're playing vs random agent:
            if player == 1:
                move = agent.action(board_copy, dice, player, i, net)
            elif player == -1:
                move = random_agent(board_copy, dice, player, i)

            # update the board
            if len(move) != 0:
                for m in move:
                    board = update_board(board, m, player)

            #  give status after every move:
            if commentary:
                print("move from player", player, ":")
                pretty_print(board)

        #  players take turns
        player = -player

    #  return the winner
    # print(board)
    # f_final = agent.getFeatures(board, player)
    # print(f_final)
    # print(agent.getValue(board, player))
    delta = -1 * player - net.torch_nn.forward(agent.getFeatures(board_copy, player))
    net.torch_nn.backward(net.gamma, delta)
    net.torch_nn_policy.theta = net.torch_nn_policy.theta + net.torch_nn_policy.alpha_theta * net.i * delta * agent.getFeatures(board_copy, player)
    return -1 * player
예제 #4
0
def play_a_game(commentary=True):
    board = init_board()  # initialize the board
    player = 1  # player 1 starts

    # play on
    while not game_over(board):  #and not check_for_error(board):
        if commentary: print("lets go player ", player)

        dice = roll_dice()  # roll dice
        if commentary: print("rolled dices:", dice)

        # make a move (2 moves if the same number appears on the dice)
        for i in range(1 + int(dice[0] == dice[1])):
            board_copy = np.copy(board)

            # make the move
            if player == 1:
                move = agent.action(board_copy, dice, 1, i)
            elif player == -1:
                move = random_agent(board_copy, dice, i)

            # update the board
            if len(move) != 0:
                for m in move:
                    board = update_board(board, m)

            if commentary:
                print("move from player", player, ":")
                print("board:")
                if (player == 1):
                    pretty_print(board)
                else:
                    pretty_print(flip_board(board))

        # players take turns
        player = -player
        board = flip_board(board)

    # return the winner
    return -1 * player
예제 #5
0
                      "human":{"type":"BB8Agent", "spec":{"init_x":np.vstack([ 50.,20.0, 0., 0.])}}
                    }
    reaching_world_spec = {
        "friction": 0,
        "reaching_eps": 0.1,
        "agent_goal_lists":{
            "robot": [[70.0,20.0], [10, 40]],
            "human": [[10.0,20.0], [40, 70]],
        }
    }
    env_spec = {
        "world": {"type":"FlatReachingWorld", "spec":reaching_world_spec},
        "dt": 0.02,
        "agent_env_spec": agent_env_spec
    }
    evaluator = evaluator.Evaluator(agent_specs, env_spec)

    env = env.FlatEnv(env_spec, agents)
    dt, env_info, measurement_groups = env.reset()
    record = []
    print("Simulation progress:")
    for it in progressbar.progressbar(range(100)):
        actions = {}
        for agent in agents:
            # an action is dictionary which must contain a key "control"
            actions[agent.name] = agent.action(dt, measurement_groups[agent.name])
            #sensor data is grouped by agent
        dt, env_info, measurement_groups = env.step(actions)
        record.append((env_info,measurement_groups))

    evaluator.evaluate(record)
예제 #6
0
def play_a_game(modelPlayer,
                modelPlayerOne,
                modelPlayerOther,
                commentary=False,
                randomAgent=False):
    board = init_board()  # initialize the board
    player = np.random.randint(2) * 2 - 1  # which player begins?

    modelPlayerOne.Z_w1 = torch.zeros(modelPlayerOne.w1.size(),
                                      device=modelPlayerOne.device,
                                      dtype=torch.float)
    modelPlayerOne.Z_b1 = torch.zeros(modelPlayerOne.b1.size(),
                                      device=modelPlayerOne.device,
                                      dtype=torch.float)
    modelPlayerOne.Z_w2 = torch.zeros(modelPlayerOne.w2.size(),
                                      device=modelPlayerOne.device,
                                      dtype=torch.float)
    modelPlayerOne.Z_b2 = torch.zeros(modelPlayerOne.b2.size(),
                                      device=modelPlayerOne.device,
                                      dtype=torch.float)

    modelPlayerOther.Z_w1 = torch.zeros(modelPlayerOther.w1.size(),
                                        device=modelPlayerOther.device,
                                        dtype=torch.float)
    modelPlayerOther.Z_b1 = torch.zeros(modelPlayerOther.b1.size(),
                                        device=modelPlayerOther.device,
                                        dtype=torch.float)
    modelPlayerOther.Z_w2 = torch.zeros(modelPlayerOther.w2.size(),
                                        device=modelPlayerOther.device,
                                        dtype=torch.float)
    modelPlayerOther.Z_b2 = torch.zeros(modelPlayerOther.b2.size(),
                                        device=modelPlayerOther.device,
                                        dtype=torch.float)

    #pretty_print(board)
    # play on
    while not game_over(board) and not check_for_error(board):
        #for okei in range(2):
        if commentary: print("lets go player ", player)

        # roll dice
        dice = roll_dice()
        if commentary: print("rolled dices:", dice)

        # make a move (2 moves if the same number appears on the dice)
        for i in range(1 + int(dice[0] == dice[1])):
            board_copy = np.copy(board)
            # make the move (agent vs agent):
            #move = agent.action(board_copy,dice,player,i)

            #if you're playing vs random agent:
            if (randomAgent):
                if player == 1:
                    if (modelPlayer == 1):
                        move = agent.action(board_copy, dice, player, i,
                                            modelPlayerOne)
                    else:
                        move = agent.action(board_copy, dice, player, i,
                                            modelPlayerOther)
                elif player == -1:
                    move = random_agent(board_copy, dice, player, i)
            else:
                if player == 1:
                    move = agent.action(board_copy, dice, player, i,
                                        modelPlayerOne)
                elif player == -1:
                    move = flipped_agent.action(board_copy, dice, player, i,
                                                modelPlayerOther)

            # update the board
            if len(move) != 0:
                for m in move:
                    board = update_board(board, m, player)

            # give status after every move:
            if commentary:
                print("move from player", player, ":")
                pretty_print(board)

        # players take turns
        player = -player

    modelPlayerOne.gameFinishedUpdate(-1 * player)
    modelPlayerOne.dynaUpdate()
    modelPlayerOther.gameFinishedUpdate(-1 * player)
    modelPlayerOther.dynaUpdate()
    #if(game_over(board)):
    #   pretty_print(board)
    # return the winner
    return -1 * player
예제 #7
0
def play_a_game(opponent, commentary = False):
    board = init_board() # initialize the board
    player = np.random.randint(2)*2-1 # which player begins?
    y_old = 0
    y_old_p2 = 0
    firstMove = True
    firstMove_p2 = True
    pickle_in = open("randommodel.pickle","rb")
    model = pickle.load(pickle_in)
    model = model.cuda()
    # play on
    while not game_over(board) and not check_for_error(board):
        if commentary: print("lets go player ",player)
        
        # roll dice
        dice = roll_dice()
        if commentary: print("rolled dices:", dice)
            
        # make a move (2 moves if the same number appears on the dice)
        for i in range(1+int(dice[0] == dice[1])):
            board_copy = np.copy(board) 

            # make the move (agent vs agent):
            if(opponent == "agent"):
                if player == 1:
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, True)
                    # update the board
                    if len(move) != 0:
                        for m in move:
                            board = update_board(board, m, player)
                    if(firstMove):
                        firstMove = False
                elif player == -1:
                    flipped_board = flipped_agent.flip_board(board_copy)
                    move, y_old_p2 = agent.action(flipped_board,dice,1,i, y_old_p2, model, firstMove_p2, True)
                    if len(move) != 0:
                        for m in move:
                            flipped_board = update_board(flipped_board, m, 1)
                    board = flipped_agent.flip_board(flipped_board)
            
                    if(firstMove_p2):
                        firstMove_p2 = False
            elif(opponent == "human"):
                pretty_print(board)
                if player == 1:
                    print("Computer's turn")
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False)
                    print("Computer's move", move)
                elif player == -1:
                    print("Human's turn")
                    possible_moves, possible_boards = legal_moves(board_copy, dice, player)
                    print("dice:", dice)
                    printing.moves_to_string(possible_moves)
                    text = input("prompt")
                    move = possible_moves[int(text)]

                if len(move) != 0:
                    for m in move:
                        board = update_board(board, m, player)
            #if you're playing vs random agent:
            elif(opponent == "random"):
                if player == 1:
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False)
                elif player == -1:
                    move = random_agent(board_copy,dice,player,i)
                if len(move) != 0:
                        for m in move:
                            board = update_board(board, m, player)
            # update the board

            
            # give status after every move:         
            if commentary: 
                print("move from player",player,":")
                pretty_print(board)
        

        # players take turns 
        player = -player


    # return the winner
    winner = -1*player
    if(opponent == "agent"):
        if(winner == 1):
            agent.learn(y_old, model, board_copy, "yes")
            agent.learn(y_old_p2, model, board_copy, "no")
        else:
            agent.learn(y_old, model, board_copy, "no")
            agent.learn(y_old_p2, model, board_copy, "yes")
        
    #print("Winner is player", winner)
    pickle_out = open("randommodel.pickle","wb")
    pickle.dump(model, pickle_out)
    pickle_out.close()
    return winner