Beispiel #1
0
def main():
    opt = get_args()

    assert opt.environment in environment.ENV_DICT.keys(), \
        "Unsupported environment: {} \nSupported environemts: {}".format(opt.environment, environment.ENV_DICT.keys())

    if opt.tpu:
        device = tpu.get_TPU()
    else:
        device = opt.device

    mp = _mp.get_context("spawn")

    ENV = environment.ENV_DICT[opt.environment]

    env = ENV.make_env(lstm=opt.lstm)
    state_shape = env.observation_space.shape
    n_actions = env.action_space.n

    shared_agent = A3C(n_actions=n_actions, lstm=opt.lstm).to(device)

    shared_agent.share_memory()

    optim = SharedAdam(shared_agent.parameters(), lr=opt.lr)
    optim.share_memory()

    processes = []

    for rank in range(0, opt.num_processes):
        p = mp.Process(target=train, args=(ENV.make_env, shared_agent, optim, device, opt, rank))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
def main():
  rnd_seed = None
  if rnd_seed:
    torch.manual_seed(rnd_seed)
    np.random.seed(rnd_seed)


  # ---------------------------------------
  #           DATA LOADING
  # ---------------------------------------
  #result_path = "../result_lrn_0p001_rl/"

  dict_file = "../dataset/CCGbank/dict_word"
  entity_file = "../dataset/CCGbank/dict_tag"
  index2word = get_index2word(dict_file)
  index2label = get_index2label(entity_file)
  vocab_size = len(index2word)
  label_size = len(index2label)

  #train_X, train_Y = minibatch_of_one_de('train')
  val_X, val_Y = minibatch_of_one_de('val')
  test_X, test_Y = minibatch_of_one_de('test')

  # ---------------------------------------
  #           HYPER PARAMETERS
  # ---------------------------------------
  # Using word2vec pre-trained embedding
  word_embedding_dim = 300

  hidden_dim = 512
  label_embedding_dim = 512
  max_epoch = 30
  # 0.001 is a good value
  ner_learning_rate = 0.001

  pretrained = None

  # ---------------------------------------
  #           GPU OR NOT?
  # ---------------------------------------
  gpu = True
  if gpu and rnd_seed:
    torch.cuda.manual_seed(rnd_seed)

  # ---------------------------------------
  #        MODEL INSTANTIATION
  # ---------------------------------------
  #attention = None
  attention = "fixed"

  load_model_dir = "../result_ccg_lrn_0p001_atten/"
  load_model_filename = os.path.join(load_model_dir, "ckpt_11.pth")

  batch_size = 1
  machine = ner(word_embedding_dim, hidden_dim, label_embedding_dim, vocab_size,
                label_size, learning_rate=ner_learning_rate,
                minibatch_size=batch_size, max_epoch=max_epoch, train_X=None,
                train_Y=None, val_X=val_X, val_Y=val_Y, test_X=test_X,
                test_Y=test_Y, attention=attention, gpu=gpu,
                pretrained=pretrained, load_model_filename=load_model_filename)
  if gpu:
    machine = machine.cuda()

  initial_beam_size = 1
  # When you have only one beam, it does not make sense to consider
  # max_beam_size larger than the size of your label vocabulary
  max_beam_size = 10

  # ============   INIT RL =====================
  os.environ['OMP_NUM_THREADS'] = '4'
  #os.environ['CUDA_VISIBLE_DEVICES'] = ""


  parser = argparse.ArgumentParser(description='A3C')

  parser.add_argument('--logdir', default='../result_ccg_atten_ckpt_11_rl_lrn_0p001_reward_0p02_beam_1_gpu',
                      help='name of logging directory')
  parser.add_argument('--lr', type=float, default=0.001,
                      help='learning rate (default: 0.0001)')
  parser.add_argument('--gamma', type=float, default=0.99,
                      help='discount factor for rewards (default: 0.99)')
  parser.add_argument('--n_epochs', type=int, default=100,
                      help='number of epochs for training agent(default: 30)')
  parser.add_argument('--entropy-coef', type=float, default=0.01,
                      help='entropy term coefficient (default: 0.01)')
  parser.add_argument('--num-processes', type=int, default=1,
                      help='how many training processes to use (default: 4)')
  parser.add_argument('--num-steps', type=int, default=20,
                      help='number of forward steps in A3C (default: 20)')

  parser.add_argument('--tau', type=float, default=1.00,
                      help='parameter for GAE (default: 1.00)')
  parser.add_argument('--value-loss-coef', type=float, default=0.5,
                      help='value loss coefficient (default: 0.5)')
  parser.add_argument('--max-grad-norm', type=float, default=5,
                      help='value loss coefficient (default: 5)')
  parser.add_argument('--seed', type=int, default=1,
                      help='random seed (default: 1)')
  parser.add_argument('--max-episode-length', type=int, default=1000000,
                      help='maximum length of an episode (default: 1000000)')
  parser.add_argument('--name', default='train',
                      help='name of the process')
  parser.add_argument('--no-shared', default=False,
                      help='use an optimizer without shared momentum.')
  args = parser.parse_args()

  if not os.path.exists(args.logdir):
    os.mkdir(args.logdir)

  shared_model = AdaptiveActorCritic(max_beam_size=max_beam_size,
                                     action_space=3)
  shared_model.share_memory()

  if args.no_shared:
    shared_optimizer = None
  # default here (False)
  else:
    shared_optimizer = SharedAdam(params=shared_model.parameters(),
                                  lr=args.lr)
    # optimizer = optim.Adam(shared_model.parameters(), lr=learning_rate)
    shared_optimizer.share_memory()

  # --------------------------------------------
  #                 RL TRAINING
  # --------------------------------------------
  # For German dataset, f_score_index_begin = 5 (because O_INDEX = 4)
  # For toy dataset, f_score_index_begin = 4 (because {0: '<s>', 1: '<e>', 2: '<p>', 3: '<u>', ...})
  # For CCG dataset, f_score_index_begin = 2 (because {0: _PAD, 1: _SOS, ...})
  f_score_index_begin = 2
  # RL reward coefficient
  reward_coef_fscore = 1
  reward_coef_beam_size = 0.02

  train_adaptive(0,
                 machine,
                 max_beam_size,
                 shared_model,
                 shared_optimizer,
                 val_X, val_Y, index2word, index2label,
                 "val", "adaptive", initial_beam_size,
                 reward_coef_fscore, reward_coef_beam_size,
                 f_score_index_begin,
                 args)
Beispiel #3
0
    kill = mp.Event()
    counter = mp.Value('i', 0)
    steps = mp.Value('i', 0)
    lock = mp.Lock()

    torch.set_num_threads(1)
    torch.manual_seed(args.seed)
    env = create_vizdoom_env(args.config_path, args.train_scenario_path)
    shared_model = ActorCritic(env.observation_space.spaces[0].shape[0],
                               env.action_space, args.topology)
    shared_model.share_memory()

    if args.no_shared:
        optimizer = Adam(shared_model.parameters(), lr=args.lr)
    else:
        optimizer = SharedAdam(shared_model.parameters(), lr=args.lr)
        optimizer.share_memory()

    if args.checkpoint_path and os.path.isfile(args.checkpoint_path):
        checkpoint = torch.load(args.checkpoint_path)
        counter.value = checkpoint['episodes']
        shared_model.load_state_dict(checkpoint['model'])
        shared_model.share_memory()
        optimizer.load_state_dict(checkpoint['optimizer'])
        optimizer.share_memory()
    else:
        checkpoint = {}

    processes = []

    logging = build_logger(
Beispiel #4
0
def main(args):
    mp.set_start_method('spawn')  # required to avoid Conv2d froze issue
    summary_queue = mp.Queue()
    game_intf = GameInterfaceHandler(args.mode)
    # critic
    shared_model = FullyConv(game_intf.minimap_channels,
                             game_intf.screen_channels,
                             game_intf.screen_resolution, game_intf.num_action,
                             args.lstm)

    # load or reset model file and logs
    counter_f_path = os.path.join(args.log_dir, args.mode, args.map_name,
                                  args.job_name, "counter.log")
    init_episode_counter_val = 0
    if not args.reset:
        try:
            model_f_path = os.path.join(args.model_dir, args.mode,
                                        args.map_name, args.job_name,
                                        "model.dat")
            shared_model.load_state_dict(torch.load(model_f_path))
            with open(counter_f_path, 'r') as counter_f:
                init_episode_counter_val = int(counter_f.readline())
            summary_queue.put(
                Summary(
                    action='add_text',
                    tag='log',
                    value1='Reuse trained model {0}, from global_counter: {1}'.
                    format(model_f_path, init_episode_counter_val)))
        except FileNotFoundError as e:
            summary_queue.put(
                Summary(
                    action='add_text',
                    tag='log',
                    value1='No model found -- Start from scratch, {0}'.format(
                        str(e))))
    else:
        summary_queue.put(
            Summary(action='add_text',
                    tag='log',
                    value1='Reset -- Start from scratch'))
    with open(counter_f_path, 'w+') as counter_f:
        counter_f.write(str(init_episode_counter_val))
    summary_queue.put(
        Summary(action='add_text',
                tag='log',
                value1='Main process PID: {0}'.format(os.getpid())))
    shared_model.share_memory()

    optimizer = SharedAdam(shared_model.parameters(), lr=args.lr)
    optimizer.share_memory()

    # multiprocesses, Hogwild! style update
    processes = []

    global_episode_counter = mp.Value('i', init_episode_counter_val)

    # each worker_thread creates its own environment and trains agents
    for rank in range(args.num_processes):
        # only write summaries in one of the workers, since they are identical
        worker_summary_queue = summary_queue if rank == 0 else None
        worker_thread = mp.Process(target=worker_fn,
                                   args=(rank, args, shared_model,
                                         global_episode_counter,
                                         worker_summary_queue, optimizer))
        worker_thread.daemon = True
        worker_thread.start()
        processes.append(worker_thread)
        time.sleep(2)

    # start a thread for policy evaluation
    monitor_thread = mp.Process(target=monitor_fn,
                                args=(args.num_processes, args, shared_model,
                                      global_episode_counter, summary_queue))
    monitor_thread.daemon = True
    monitor_thread.start()
    processes.append(monitor_thread)

    # summary writer thread
    summary_thread = mp.Process(target=writer_fn,
                                args=(args, summary_queue,
                                      init_episode_counter_val))
    summary_thread.daemon = True
    summary_thread.start()
    processes.append(summary_thread)

    # wait for all processes to finish
    try:
        killed_process_count = 0
        for process in processes:
            process.join()
            killed_process_count += 1 if process.exitcode == 1 else 0
            if killed_process_count >= args.num_processes:
                # exit if only monitor and writer alive
                raise SystemExit
    except (KeyboardInterrupt, SystemExit):
        for process in processes:
            # without killing child process, process.terminate() will cause orphans
            # ref: https://thebearsenal.blogspot.com/2018/01/creation-of-orphan-process-in-linux.html
            kill_child_processes(process.pid)
            process.terminate()
            process.join()
Beispiel #5
0
    torch.manual_seed(args.seed)
    env = StackEnv(env, args.frame_num)

    # ディレクトリの作成
    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)

    global_brain = Policy(
        env.action_space.n,
        dim_obs=env.observation_space.shape[0],
        out_dim=args.out_dim,
        frame_num=args.frame_num)  #.to(device) # global brain の定義
    global_brain.share_memory()
    #optimizer = SharedRMSprop(global_brain.parameters(),lr=args.lr)
    optimizer = SharedAdam(global_brain.parameters(),
                           lr=args.lr,
                           betas=(0.92, 0.999))
    #optimizer.share_memory()
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    wt = Environment(args, global_ep, global_ep_r, res_queue, global_brain,
                     optimizer)

    global_t = torch.LongTensor(1).share_memory_()
    global_t.zero_()
    processes = []

    pipe_reward = []
    for rank in range(args.num_process):
Beispiel #6
0
def main():
    rnd_seed = None
    if rnd_seed:
        torch.manual_seed(rnd_seed)
        np.random.seed(rnd_seed)

    # ---------------------------------------
    #           DATA LOADING
    # ---------------------------------------
    result_path = "./result/"
    if not os.path.exists(result_path):
        os.makedirs(result_path)

    dict_file = "../../dataset/German/vocab1.de"
    entity_file = "../../dataset/German/vocab1.en"
    index2word = get_index2word(dict_file)
    index2label = get_index2label(entity_file)
    vocab_size = len(index2word)
    label_size = len(index2label)

    train_X, train_Y = minibatch_of_one_de('train')
    val_X, val_Y = minibatch_of_one_de('valid')
    test_X, test_Y = minibatch_of_one_de('test')

    # ---------------------------------------
    #           HYPER PARAMETERS
    # ---------------------------------------
    # Using word2vec pre-trained embedding
    word_embedding_dim = 300
    hidden_dim = 64
    label_embedding_dim = 8
    max_epoch = 100
    # 0.001 is a good value
    learning_rate = 0.001

    pretrained = 'de64'

    if pretrained == 'de64':
        word_embedding_dim = 64

    # ---------------------------------------
    #           GPU OR NOT?
    # ---------------------------------------
    gpu = False
    if gpu and rnd_seed:
        torch.cuda.manual_seed(rnd_seed)

    # ---------------------------------------
    #        MODEL INSTANTIATION
    # ---------------------------------------
    attention = "fixed"
    attn_string = '_attention' if attention else ''
    load_model_filename = os.path.join(result_path,
                                       "ckpt" + attn_string + ".pth")

    batch_size = 1
    machine = ner(word_embedding_dim,
                  hidden_dim,
                  label_embedding_dim,
                  vocab_size,
                  label_size,
                  learning_rate=learning_rate,
                  minibatch_size=batch_size,
                  max_epoch=max_epoch,
                  train_X=None,
                  train_Y=None,
                  val_X=val_X,
                  val_Y=val_Y,
                  test_X=test_X,
                  test_Y=test_Y,
                  attention=attention,
                  gpu=gpu,
                  pretrained=pretrained,
                  load_model_filename=load_model_filename,
                  load_map_location="cpu")
    if gpu:
        machine = machine.cuda()

    initial_beam_size = 10
    # When you have only one beam, it does not make sense to consider
    # max_beam_size larger than the size of your label vocabulary
    max_beam_size = label_size

    # ============   INIT RL =====================
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['CUDA_VISIBLE_DEVICES'] = ""

    parser = argparse.ArgumentParser(description='A3C')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.99,
                        help='discount factor for rewards (default: 0.99)')
    parser.add_argument('--tau',
                        type=float,
                        default=1.00,
                        help='parameter for GAE (default: 1.00)')
    parser.add_argument('--entropy-coef',
                        type=float,
                        default=0.01,
                        help='entropy term coefficient (default: 0.01)')
    parser.add_argument('--value-loss-coef',
                        type=float,
                        default=0.5,
                        help='value loss coefficient (default: 0.5)')
    parser.add_argument('--max-grad-norm',
                        type=float,
                        default=5,
                        help='value loss coefficient (default: 5)')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument(
        '--n_epochs',
        type=int,
        default=30,
        help='number of epochs for training agent(default: 30)')
    parser.add_argument('--num-processes',
                        type=int,
                        default=4,
                        help='how many training processes to use (default: 4)')
    parser.add_argument('--num-steps',
                        type=int,
                        default=20,
                        help='number of forward steps in A3C (default: 20)')
    parser.add_argument('--max-episode-length',
                        type=int,
                        default=1000000,
                        help='maximum length of an episode (default: 1000000)')
    parser.add_argument('--name', default='train', help='name of the process')
    parser.add_argument('--logdir',
                        default='log',
                        help='name of logging directory')
    parser.add_argument('--no-shared',
                        default=False,
                        help='use an optimizer without shared momentum.')
    args = parser.parse_args()

    if not os.path.exists(args.logdir):
        os.mkdir(args.logdir)

    shared_model = AdaptiveActorCritic(max_beam_size=max_beam_size,
                                       action_space=3)
    shared_model.share_memory()

    if args.no_shared:
        optimizer = None
    else:
        optimizer = SharedAdam(params=shared_model.parameters(),
                               lr=learning_rate)
        # optimizer = optim.Adam(shared_model.parameters(), lr=learning_rate)
        optimizer.share_memory()

    # --------------------------------------------
    #                 RL TRAINING
    # --------------------------------------------
    # For German dataset, f_score_index_begin = 5 (because O_INDEX = 4)
    # For toy dataset, f_score_index_begin = 4 (because {0: '<s>', 1: '<e>', 2: '<p>', 3: '<u>', ...})
    f_score_index_begin = 5
    # RL reward coefficient
    reward_coef_fscore = 1
    reward_coef_beam_size = 0.1

    processes = []
    counter = mp.Value('i', 0)
    lock = mp.Lock()

    # eval along with many processes of training RL
    args.name = "val"
    p_val = mp.Process(target=test_adaptive,
                       args=(args.num_processes, machine, max_beam_size,
                             learning_rate, shared_model, counter, val_X,
                             val_Y, index2word, index2label, "val", "log_",
                             "adaptive", initial_beam_size, reward_coef_fscore,
                             reward_coef_beam_size, f_score_index_begin, args))

    p_val.start()
    processes.append(p_val)

    args.name = "test"
    p_test = mp.Process(target=test_adaptive,
                        args=(args.num_processes + 1, machine, max_beam_size,
                              learning_rate, shared_model, counter, test_X,
                              test_Y, index2word, index2label, "test", "log_",
                              "adaptive", initial_beam_size,
                              reward_coef_fscore, reward_coef_beam_size,
                              f_score_index_begin, args))

    p_test.start()
    processes.append(p_test)

    args.name = "train"
    for rank in range(0, args.num_processes):
        p = mp.Process(target=train_adaptive,
                       args=(rank, machine, max_beam_size, learning_rate,
                             shared_model, counter, lock, optimizer, train_X,
                             train_Y, index2word, index2label, "train", "log_",
                             "adaptive", initial_beam_size, reward_coef_fscore,
                             reward_coef_beam_size, f_score_index_begin, args))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

    # =====================================
    print("TESTING w SHARED MODEL")
    processes = []
    counter = mp.Value('i', 0)

    # test for only 1 epoch
    args.n_epoches = 1
    args.name = "final_test"
    p = mp.Process(target=test_adaptive,
                   args=(args.num_processes + 2, machine, max_beam_size,
                         learning_rate, shared_model, counter, test_X, test_Y,
                         index2word, index2label, "test", args.name,
                         "adaptive", initial_beam_size, reward_coef_beam_size,
                         f_score_index_begin, f_score_index_begin, args))
    p.start()
    processes.append(p)

    for p in processes:
        p.join()
Beispiel #7
0
def main():
    """
    Train an A3C agent
    """
    os.environ['OMP_NUM_THREADS'] = '1'
    # Command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--max_timesteps',
        default=5000000,
        type=int,
        help="How many total timesteps to run between all environments")
    parser.add_argument(
        '--batch_size',
        default=20,
        type=int,
        help="How many steps to do before reflecting on the batch")
    parser.add_argument('--env_name',
                        default='PongNoFrameskip-v4',
                        type=str,
                        help="Which environment to train on")
    parser.add_argument(
        '--discount_factor',
        default=0.99,
        type=float,
        help=("The disount factor, also called gamma, used for discounting "
              "future returns"))
    parser.add_argument('--gae',
                        default=1.,
                        type=float,
                        help="Parameter for use in GAE, also called tau")
    parser.add_argument('--actor_coef',
                        default=1.,
                        type=float,
                        help="How much weight to give the actor when updating")
    parser.add_argument(
        '--critic_coef',
        default=0.5,
        type=float,
        help="How much weight to give the critic when updating")
    parser.add_argument('--entropy_coef',
                        default=0.01,
                        type=float,
                        help="How much weight to give entropy when updating")
    parser.add_argument('--learning_rate',
                        default=0.0001,
                        type=float,
                        help="Optimizer learning rate")
    parser.add_argument('--no_of_workers',
                        default=16,
                        type=int,
                        help="Number of parallel processes to run")
    parser.add_argument(
        '--feature_type',
        default='cnn',
        type=str,
        help="""The feature extractor to use on the network input.
        Options are: cnn, mlp""")
    args = parser.parse_args()
    print(f"Args: {args}")

    hyperparams = HyperParams(max_timesteps=args.max_timesteps,
                              batch_size=args.batch_size,
                              discount_factor=args.discount_factor,
                              gae=args.gae,
                              actor_coef=args.actor_coef,
                              critic_coef=args.critic_coef,
                              entropy_coef=args.entropy_coef,
                              env_name=args.env_name,
                              learning_rate=args.learning_rate,
                              no_of_workers=args.no_of_workers,
                              feature_type=args.feature_type)

    # Make temporary directory for logging
    directory = './runs/{}'.format(
        datetime.datetime.now().strftime("%Y%m%d-%H%M"))
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Shared model
    atari = True if hyperparams.feature_type == 'cnn' else False
    temp_env = create_environment(args.env_name, monitor=False, atari=atari)
    shared_model = ActorCritic(temp_env.observation_space.shape,
                               temp_env.action_space.n,
                               hyperparams.feature_type)
    shared_model.share_memory()

    # Frame counter
    frame_counter = Value('i')

    # Optimizer
    optimizer = SharedAdam(shared_model.parameters(),
                           lr=hyperparams.learning_rate)
    optimizer.share_memory()

    # Monitor
    monitor = Monitor(directory, hyperparams)

    processes = []
    monitor_process = Process(target=monitor.monitor,
                              args=(frame_counter, hyperparams.max_timesteps))
    monitor_process.start()
    processes.append(monitor_process)
    for i in range(hyperparams.no_of_workers):
        process = Process(target=train,
                          args=(shared_model, directory, hyperparams,
                                frame_counter, optimizer, monitor.queue, i))
        process.start()
        processes.append(process)

    # train(
    #     shared_model=shared_model,
    #     directory=directory,
    #     hyperparams=hyperparams,
    #     frame_counter=frame_counter,
    #     optimizer=optimizer,
    #     monitor_queue=monitor.queue,
    #     process_number=0
    # )

    for process in processes:
        process.join()