def main(): opt = get_args() assert opt.environment in environment.ENV_DICT.keys(), \ "Unsupported environment: {} \nSupported environemts: {}".format(opt.environment, environment.ENV_DICT.keys()) if opt.tpu: device = tpu.get_TPU() else: device = opt.device mp = _mp.get_context("spawn") ENV = environment.ENV_DICT[opt.environment] env = ENV.make_env(lstm=opt.lstm) state_shape = env.observation_space.shape n_actions = env.action_space.n shared_agent = A3C(n_actions=n_actions, lstm=opt.lstm).to(device) shared_agent.share_memory() optim = SharedAdam(shared_agent.parameters(), lr=opt.lr) optim.share_memory() processes = [] for rank in range(0, opt.num_processes): p = mp.Process(target=train, args=(ENV.make_env, shared_agent, optim, device, opt, rank)) p.start() processes.append(p) for p in processes: p.join()
def main(): rnd_seed = None if rnd_seed: torch.manual_seed(rnd_seed) np.random.seed(rnd_seed) # --------------------------------------- # DATA LOADING # --------------------------------------- #result_path = "../result_lrn_0p001_rl/" dict_file = "../dataset/CCGbank/dict_word" entity_file = "../dataset/CCGbank/dict_tag" index2word = get_index2word(dict_file) index2label = get_index2label(entity_file) vocab_size = len(index2word) label_size = len(index2label) #train_X, train_Y = minibatch_of_one_de('train') val_X, val_Y = minibatch_of_one_de('val') test_X, test_Y = minibatch_of_one_de('test') # --------------------------------------- # HYPER PARAMETERS # --------------------------------------- # Using word2vec pre-trained embedding word_embedding_dim = 300 hidden_dim = 512 label_embedding_dim = 512 max_epoch = 30 # 0.001 is a good value ner_learning_rate = 0.001 pretrained = None # --------------------------------------- # GPU OR NOT? # --------------------------------------- gpu = True if gpu and rnd_seed: torch.cuda.manual_seed(rnd_seed) # --------------------------------------- # MODEL INSTANTIATION # --------------------------------------- #attention = None attention = "fixed" load_model_dir = "../result_ccg_lrn_0p001_atten/" load_model_filename = os.path.join(load_model_dir, "ckpt_11.pth") batch_size = 1 machine = ner(word_embedding_dim, hidden_dim, label_embedding_dim, vocab_size, label_size, learning_rate=ner_learning_rate, minibatch_size=batch_size, max_epoch=max_epoch, train_X=None, train_Y=None, val_X=val_X, val_Y=val_Y, test_X=test_X, test_Y=test_Y, attention=attention, gpu=gpu, pretrained=pretrained, load_model_filename=load_model_filename) if gpu: machine = machine.cuda() initial_beam_size = 1 # When you have only one beam, it does not make sense to consider # max_beam_size larger than the size of your label vocabulary max_beam_size = 10 # ============ INIT RL ===================== os.environ['OMP_NUM_THREADS'] = '4' #os.environ['CUDA_VISIBLE_DEVICES'] = "" parser = argparse.ArgumentParser(description='A3C') parser.add_argument('--logdir', default='../result_ccg_atten_ckpt_11_rl_lrn_0p001_reward_0p02_beam_1_gpu', help='name of logging directory') parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 0.0001)') parser.add_argument('--gamma', type=float, default=0.99, help='discount factor for rewards (default: 0.99)') parser.add_argument('--n_epochs', type=int, default=100, help='number of epochs for training agent(default: 30)') parser.add_argument('--entropy-coef', type=float, default=0.01, help='entropy term coefficient (default: 0.01)') parser.add_argument('--num-processes', type=int, default=1, help='how many training processes to use (default: 4)') parser.add_argument('--num-steps', type=int, default=20, help='number of forward steps in A3C (default: 20)') parser.add_argument('--tau', type=float, default=1.00, help='parameter for GAE (default: 1.00)') parser.add_argument('--value-loss-coef', type=float, default=0.5, help='value loss coefficient (default: 0.5)') parser.add_argument('--max-grad-norm', type=float, default=5, help='value loss coefficient (default: 5)') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--max-episode-length', type=int, default=1000000, help='maximum length of an episode (default: 1000000)') parser.add_argument('--name', default='train', help='name of the process') parser.add_argument('--no-shared', default=False, help='use an optimizer without shared momentum.') args = parser.parse_args() if not os.path.exists(args.logdir): os.mkdir(args.logdir) shared_model = AdaptiveActorCritic(max_beam_size=max_beam_size, action_space=3) shared_model.share_memory() if args.no_shared: shared_optimizer = None # default here (False) else: shared_optimizer = SharedAdam(params=shared_model.parameters(), lr=args.lr) # optimizer = optim.Adam(shared_model.parameters(), lr=learning_rate) shared_optimizer.share_memory() # -------------------------------------------- # RL TRAINING # -------------------------------------------- # For German dataset, f_score_index_begin = 5 (because O_INDEX = 4) # For toy dataset, f_score_index_begin = 4 (because {0: '<s>', 1: '<e>', 2: '<p>', 3: '<u>', ...}) # For CCG dataset, f_score_index_begin = 2 (because {0: _PAD, 1: _SOS, ...}) f_score_index_begin = 2 # RL reward coefficient reward_coef_fscore = 1 reward_coef_beam_size = 0.02 train_adaptive(0, machine, max_beam_size, shared_model, shared_optimizer, val_X, val_Y, index2word, index2label, "val", "adaptive", initial_beam_size, reward_coef_fscore, reward_coef_beam_size, f_score_index_begin, args)
kill = mp.Event() counter = mp.Value('i', 0) steps = mp.Value('i', 0) lock = mp.Lock() torch.set_num_threads(1) torch.manual_seed(args.seed) env = create_vizdoom_env(args.config_path, args.train_scenario_path) shared_model = ActorCritic(env.observation_space.spaces[0].shape[0], env.action_space, args.topology) shared_model.share_memory() if args.no_shared: optimizer = Adam(shared_model.parameters(), lr=args.lr) else: optimizer = SharedAdam(shared_model.parameters(), lr=args.lr) optimizer.share_memory() if args.checkpoint_path and os.path.isfile(args.checkpoint_path): checkpoint = torch.load(args.checkpoint_path) counter.value = checkpoint['episodes'] shared_model.load_state_dict(checkpoint['model']) shared_model.share_memory() optimizer.load_state_dict(checkpoint['optimizer']) optimizer.share_memory() else: checkpoint = {} processes = [] logging = build_logger(
def main(args): mp.set_start_method('spawn') # required to avoid Conv2d froze issue summary_queue = mp.Queue() game_intf = GameInterfaceHandler(args.mode) # critic shared_model = FullyConv(game_intf.minimap_channels, game_intf.screen_channels, game_intf.screen_resolution, game_intf.num_action, args.lstm) # load or reset model file and logs counter_f_path = os.path.join(args.log_dir, args.mode, args.map_name, args.job_name, "counter.log") init_episode_counter_val = 0 if not args.reset: try: model_f_path = os.path.join(args.model_dir, args.mode, args.map_name, args.job_name, "model.dat") shared_model.load_state_dict(torch.load(model_f_path)) with open(counter_f_path, 'r') as counter_f: init_episode_counter_val = int(counter_f.readline()) summary_queue.put( Summary( action='add_text', tag='log', value1='Reuse trained model {0}, from global_counter: {1}'. format(model_f_path, init_episode_counter_val))) except FileNotFoundError as e: summary_queue.put( Summary( action='add_text', tag='log', value1='No model found -- Start from scratch, {0}'.format( str(e)))) else: summary_queue.put( Summary(action='add_text', tag='log', value1='Reset -- Start from scratch')) with open(counter_f_path, 'w+') as counter_f: counter_f.write(str(init_episode_counter_val)) summary_queue.put( Summary(action='add_text', tag='log', value1='Main process PID: {0}'.format(os.getpid()))) shared_model.share_memory() optimizer = SharedAdam(shared_model.parameters(), lr=args.lr) optimizer.share_memory() # multiprocesses, Hogwild! style update processes = [] global_episode_counter = mp.Value('i', init_episode_counter_val) # each worker_thread creates its own environment and trains agents for rank in range(args.num_processes): # only write summaries in one of the workers, since they are identical worker_summary_queue = summary_queue if rank == 0 else None worker_thread = mp.Process(target=worker_fn, args=(rank, args, shared_model, global_episode_counter, worker_summary_queue, optimizer)) worker_thread.daemon = True worker_thread.start() processes.append(worker_thread) time.sleep(2) # start a thread for policy evaluation monitor_thread = mp.Process(target=monitor_fn, args=(args.num_processes, args, shared_model, global_episode_counter, summary_queue)) monitor_thread.daemon = True monitor_thread.start() processes.append(monitor_thread) # summary writer thread summary_thread = mp.Process(target=writer_fn, args=(args, summary_queue, init_episode_counter_val)) summary_thread.daemon = True summary_thread.start() processes.append(summary_thread) # wait for all processes to finish try: killed_process_count = 0 for process in processes: process.join() killed_process_count += 1 if process.exitcode == 1 else 0 if killed_process_count >= args.num_processes: # exit if only monitor and writer alive raise SystemExit except (KeyboardInterrupt, SystemExit): for process in processes: # without killing child process, process.terminate() will cause orphans # ref: https://thebearsenal.blogspot.com/2018/01/creation-of-orphan-process-in-linux.html kill_child_processes(process.pid) process.terminate() process.join()
torch.manual_seed(args.seed) env = StackEnv(env, args.frame_num) # ディレクトリの作成 if not os.path.exists(args.log_dir): os.mkdir(args.log_dir) global_brain = Policy( env.action_space.n, dim_obs=env.observation_space.shape[0], out_dim=args.out_dim, frame_num=args.frame_num) #.to(device) # global brain の定義 global_brain.share_memory() #optimizer = SharedRMSprop(global_brain.parameters(),lr=args.lr) optimizer = SharedAdam(global_brain.parameters(), lr=args.lr, betas=(0.92, 0.999)) #optimizer.share_memory() global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() wt = Environment(args, global_ep, global_ep_r, res_queue, global_brain, optimizer) global_t = torch.LongTensor(1).share_memory_() global_t.zero_() processes = [] pipe_reward = [] for rank in range(args.num_process):
def main(): rnd_seed = None if rnd_seed: torch.manual_seed(rnd_seed) np.random.seed(rnd_seed) # --------------------------------------- # DATA LOADING # --------------------------------------- result_path = "./result/" if not os.path.exists(result_path): os.makedirs(result_path) dict_file = "../../dataset/German/vocab1.de" entity_file = "../../dataset/German/vocab1.en" index2word = get_index2word(dict_file) index2label = get_index2label(entity_file) vocab_size = len(index2word) label_size = len(index2label) train_X, train_Y = minibatch_of_one_de('train') val_X, val_Y = minibatch_of_one_de('valid') test_X, test_Y = minibatch_of_one_de('test') # --------------------------------------- # HYPER PARAMETERS # --------------------------------------- # Using word2vec pre-trained embedding word_embedding_dim = 300 hidden_dim = 64 label_embedding_dim = 8 max_epoch = 100 # 0.001 is a good value learning_rate = 0.001 pretrained = 'de64' if pretrained == 'de64': word_embedding_dim = 64 # --------------------------------------- # GPU OR NOT? # --------------------------------------- gpu = False if gpu and rnd_seed: torch.cuda.manual_seed(rnd_seed) # --------------------------------------- # MODEL INSTANTIATION # --------------------------------------- attention = "fixed" attn_string = '_attention' if attention else '' load_model_filename = os.path.join(result_path, "ckpt" + attn_string + ".pth") batch_size = 1 machine = ner(word_embedding_dim, hidden_dim, label_embedding_dim, vocab_size, label_size, learning_rate=learning_rate, minibatch_size=batch_size, max_epoch=max_epoch, train_X=None, train_Y=None, val_X=val_X, val_Y=val_Y, test_X=test_X, test_Y=test_Y, attention=attention, gpu=gpu, pretrained=pretrained, load_model_filename=load_model_filename, load_map_location="cpu") if gpu: machine = machine.cuda() initial_beam_size = 10 # When you have only one beam, it does not make sense to consider # max_beam_size larger than the size of your label vocabulary max_beam_size = label_size # ============ INIT RL ===================== os.environ['OMP_NUM_THREADS'] = '1' os.environ['CUDA_VISIBLE_DEVICES'] = "" parser = argparse.ArgumentParser(description='A3C') parser.add_argument('--lr', type=float, default=0.0001, help='learning rate (default: 0.0001)') parser.add_argument('--gamma', type=float, default=0.99, help='discount factor for rewards (default: 0.99)') parser.add_argument('--tau', type=float, default=1.00, help='parameter for GAE (default: 1.00)') parser.add_argument('--entropy-coef', type=float, default=0.01, help='entropy term coefficient (default: 0.01)') parser.add_argument('--value-loss-coef', type=float, default=0.5, help='value loss coefficient (default: 0.5)') parser.add_argument('--max-grad-norm', type=float, default=5, help='value loss coefficient (default: 5)') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument( '--n_epochs', type=int, default=30, help='number of epochs for training agent(default: 30)') parser.add_argument('--num-processes', type=int, default=4, help='how many training processes to use (default: 4)') parser.add_argument('--num-steps', type=int, default=20, help='number of forward steps in A3C (default: 20)') parser.add_argument('--max-episode-length', type=int, default=1000000, help='maximum length of an episode (default: 1000000)') parser.add_argument('--name', default='train', help='name of the process') parser.add_argument('--logdir', default='log', help='name of logging directory') parser.add_argument('--no-shared', default=False, help='use an optimizer without shared momentum.') args = parser.parse_args() if not os.path.exists(args.logdir): os.mkdir(args.logdir) shared_model = AdaptiveActorCritic(max_beam_size=max_beam_size, action_space=3) shared_model.share_memory() if args.no_shared: optimizer = None else: optimizer = SharedAdam(params=shared_model.parameters(), lr=learning_rate) # optimizer = optim.Adam(shared_model.parameters(), lr=learning_rate) optimizer.share_memory() # -------------------------------------------- # RL TRAINING # -------------------------------------------- # For German dataset, f_score_index_begin = 5 (because O_INDEX = 4) # For toy dataset, f_score_index_begin = 4 (because {0: '<s>', 1: '<e>', 2: '<p>', 3: '<u>', ...}) f_score_index_begin = 5 # RL reward coefficient reward_coef_fscore = 1 reward_coef_beam_size = 0.1 processes = [] counter = mp.Value('i', 0) lock = mp.Lock() # eval along with many processes of training RL args.name = "val" p_val = mp.Process(target=test_adaptive, args=(args.num_processes, machine, max_beam_size, learning_rate, shared_model, counter, val_X, val_Y, index2word, index2label, "val", "log_", "adaptive", initial_beam_size, reward_coef_fscore, reward_coef_beam_size, f_score_index_begin, args)) p_val.start() processes.append(p_val) args.name = "test" p_test = mp.Process(target=test_adaptive, args=(args.num_processes + 1, machine, max_beam_size, learning_rate, shared_model, counter, test_X, test_Y, index2word, index2label, "test", "log_", "adaptive", initial_beam_size, reward_coef_fscore, reward_coef_beam_size, f_score_index_begin, args)) p_test.start() processes.append(p_test) args.name = "train" for rank in range(0, args.num_processes): p = mp.Process(target=train_adaptive, args=(rank, machine, max_beam_size, learning_rate, shared_model, counter, lock, optimizer, train_X, train_Y, index2word, index2label, "train", "log_", "adaptive", initial_beam_size, reward_coef_fscore, reward_coef_beam_size, f_score_index_begin, args)) p.start() processes.append(p) for p in processes: p.join() # ===================================== print("TESTING w SHARED MODEL") processes = [] counter = mp.Value('i', 0) # test for only 1 epoch args.n_epoches = 1 args.name = "final_test" p = mp.Process(target=test_adaptive, args=(args.num_processes + 2, machine, max_beam_size, learning_rate, shared_model, counter, test_X, test_Y, index2word, index2label, "test", args.name, "adaptive", initial_beam_size, reward_coef_beam_size, f_score_index_begin, f_score_index_begin, args)) p.start() processes.append(p) for p in processes: p.join()
def main(): """ Train an A3C agent """ os.environ['OMP_NUM_THREADS'] = '1' # Command line arguments parser = argparse.ArgumentParser() parser.add_argument( '--max_timesteps', default=5000000, type=int, help="How many total timesteps to run between all environments") parser.add_argument( '--batch_size', default=20, type=int, help="How many steps to do before reflecting on the batch") parser.add_argument('--env_name', default='PongNoFrameskip-v4', type=str, help="Which environment to train on") parser.add_argument( '--discount_factor', default=0.99, type=float, help=("The disount factor, also called gamma, used for discounting " "future returns")) parser.add_argument('--gae', default=1., type=float, help="Parameter for use in GAE, also called tau") parser.add_argument('--actor_coef', default=1., type=float, help="How much weight to give the actor when updating") parser.add_argument( '--critic_coef', default=0.5, type=float, help="How much weight to give the critic when updating") parser.add_argument('--entropy_coef', default=0.01, type=float, help="How much weight to give entropy when updating") parser.add_argument('--learning_rate', default=0.0001, type=float, help="Optimizer learning rate") parser.add_argument('--no_of_workers', default=16, type=int, help="Number of parallel processes to run") parser.add_argument( '--feature_type', default='cnn', type=str, help="""The feature extractor to use on the network input. Options are: cnn, mlp""") args = parser.parse_args() print(f"Args: {args}") hyperparams = HyperParams(max_timesteps=args.max_timesteps, batch_size=args.batch_size, discount_factor=args.discount_factor, gae=args.gae, actor_coef=args.actor_coef, critic_coef=args.critic_coef, entropy_coef=args.entropy_coef, env_name=args.env_name, learning_rate=args.learning_rate, no_of_workers=args.no_of_workers, feature_type=args.feature_type) # Make temporary directory for logging directory = './runs/{}'.format( datetime.datetime.now().strftime("%Y%m%d-%H%M")) if not os.path.exists(directory): os.makedirs(directory) # Shared model atari = True if hyperparams.feature_type == 'cnn' else False temp_env = create_environment(args.env_name, monitor=False, atari=atari) shared_model = ActorCritic(temp_env.observation_space.shape, temp_env.action_space.n, hyperparams.feature_type) shared_model.share_memory() # Frame counter frame_counter = Value('i') # Optimizer optimizer = SharedAdam(shared_model.parameters(), lr=hyperparams.learning_rate) optimizer.share_memory() # Monitor monitor = Monitor(directory, hyperparams) processes = [] monitor_process = Process(target=monitor.monitor, args=(frame_counter, hyperparams.max_timesteps)) monitor_process.start() processes.append(monitor_process) for i in range(hyperparams.no_of_workers): process = Process(target=train, args=(shared_model, directory, hyperparams, frame_counter, optimizer, monitor.queue, i)) process.start() processes.append(process) # train( # shared_model=shared_model, # directory=directory, # hyperparams=hyperparams, # frame_counter=frame_counter, # optimizer=optimizer, # monitor_queue=monitor.queue, # process_number=0 # ) for process in processes: process.join()