Ejemplo n.º 1
0
def worker(policy, sb, wl, seed, total_times, lm, model_path, da, bs, tc, ex):
    env = TicTacToeXGameSpec(sb, wl, ex)
    learn(policy,
          env,
          nsteps=sb * sb,
          nstack=1,
          seed=seed,
          total_timesteps=total_times,
          load_model=lm,
          model_path=model_path,
          data_augmentation=da,
          BATCH_SIZE=bs,
          TEMP_CTE=tc,
          expert=ex)
        policy_fn = CnnPolicy_slim_scope5x5_3_1x1

    size_board = int(sys.argv[2])
    winning_length = int(sys.argv[3])
    TEMP_CTE = int(sys.argv[4])
    BATCH_SIZE = int(sys.argv[5])
    seed = int(sys.argv[6])
    expert = int(sys.argv[7])
    if expert == 1:
        expert = True
    elif expert == 2:
        expert = False
    model_path = ''
    print('________________________policy', policy_fn, 'size_board',
          size_board, 'winning_length', winning_length, 'batch size',
          BATCH_SIZE, 'TEMP_CTE', TEMP_CTE, 'seed', seed, 'expert', expert)
    env = TicTacToeXGameSpec(size_board, winning_length)
    learn(policy_fn,
          env,
          nsteps=size_board * size_board,
          nstack=1,
          seed=seed,
          total_timesteps=10000000,
          load_model=False,
          model_path=model_path,
          data_augmentation=True,
          BATCH_SIZE=BATCH_SIZE,
          TEMP_CTE=TEMP_CTE,
          expert=expert)
    env.close()