def train(opt): torch.manual_seed(123) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) mp = _mp.get_context("spawn") env, num_states, num_actions = create_train_env(opt.world, opt.stage, opt.action_type) global_model = ActorCritic(num_states, num_actions) if opt.use_gpu: global_model.cuda() global_model.share_memory() if opt.load_from_previous_stage: if opt.stage == 1: previous_world = opt.world - 1 previous_stage = 4 else: previous_world = opt.world previous_stage = opt.stage - 1 file_ = "{}/a3c_super_mario_bros_{}_{}".format(opt.saved_path, previous_world, previous_stage) if os.path.isfile(file_): global_model.load_state_dict(torch.load(file_)) optimizer = GlobalAdam(global_model.parameters(), lr=opt.lr) local_train(0, opt, global_model, optimizer, True)
def train(opt): torch.manual_seed(123) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) mp = _mp.get_context("spawn") global_model = ActorCritic(num_inputs=3, num_actions=90) global_icm = IntrinsicCuriosityModule(num_inputs=3, num_actions=90) if opt.use_gpu: global_model.cuda() global_icm.cuda() global_model.share_memory() global_icm.share_memory() optimizer = GlobalAdam(list(global_model.parameters()) + list(global_icm.parameters()), lr=opt.lr) processes = [] for index in range(opt.num_processes): if index == 0: process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer, True)) else: process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer)) process.start() processes.append(process) for process in processes: process.join()
def train(opt): torch.manual_seed(123) opt.log_path = opt.log_path + "/" + opt.exp opt.saved_path = opt.saved_path + "/" + opt.exp opt.output_path = opt.output_path + "/" + opt.exp if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) mp = _mp.get_context("spawn") global_model = ActorCritic(num_inputs=3, num_actions=opt.num_actions) global_icm = IntrinsicCuriosityModule(num_inputs=3, num_actions=opt.num_actions) if opt.resume_path: print("Load model from checkpoint: {}".format(opt.resume_path)) global_model.load_state_dict(torch.load("{}/a3c".format(opt.resume_path))) global_icm.load_state_dict(torch.load("{}/icm".format(opt.resume_path))) if opt.use_gpu: global_model.cuda() global_icm.cuda() global_model.share_memory() global_icm.share_memory() optimizer = GlobalAdam(list(global_model.parameters()) + list(global_icm.parameters()), lr=opt.lr) processes = [] for index in range(opt.num_processes): if index == 0: process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer, True)) else: process = mp.Process(target=local_train, args=(index, opt, global_model, global_icm, optimizer)) process.start() processes.append(process) for process in processes: process.join()
def train(opt): torch.manual_seed(SEED) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) mp = _mp.get_context("spawn") env, num_states, num_actions = create_train_env(opt.world, opt.stage, opt.action_type) global_model = ActorCritic(num_states, num_actions) global_model.share_memory() if opt.load_from_previous_stage: if opt.stage == 1: previous_world = opt.world - 1 previous_stage = 4 else: previous_world = opt.world previous_stage = opt.stage - 1 file_ = f"{opt.saved_path}/a3c_super_mario_bros_{previous_world}_{previous_stage}" if os.path.isfile(file_): global_model.load_state_dict(torch.load(file_)) optimizer = GlobalAdam(global_model.parameters(), lr=opt.lr) processes = [] for index in range(opt.num_processes): if index == 0: process = mp.Process(target=local_train, args=(index, opt, global_model, optimizer, True)) else: process = mp.Process(target=local_train, args=(index, opt, global_model, optimizer)) process.start() processes.append(process) process = mp.Process(target=local_test, args=(opt.num_processes, opt, global_model)) process.start() processes.append(process) for process in processes: process.join()
def shared_learn(args): os.environ['OMP_NUM_THREADS'] = '1' torch.manual_seed(123) # create path for logs if os.path.isdir(args.sum_path): shutil.rmtree(args.sum_path) os.makedirs(args.sum_path) if not os.path.isdir(args.trained_models_path): os.makedirs(args.trained_models_path) mp = _mp.get_context('spawn') # create initial mario environment env, num_states, num_actions = build_environment(args.world, args.stage) print('Num of states: {}'.format(num_states)) #4 print('environment: {}'.format(env)) print('Num of actions: {}'.format(num_actions)) #12 # check if cuda is available else cpu device = torch.device('cuda' if ( args.use_cuda and torch.cuda.is_available()) else 'cpu') CAE_shared_model = Convolutional_AutoEncoder() #.to(device) A3C_shared_model = ActorCritic(num_states, num_actions) #.to(device) # if a new stage, then it picks up previous saved model if args.new_stage: A3C_shared_model.load_state_dict( torch.load('{}/a3c_super_mario_bros_{}_{}_enc2'.format( args.world, args.stage, args.trained_models_path))) A3C_shared_model.eval() # GPU check if (args.use_cuda and torch.cuda.is_available()): A3C_shared_model.cuda() CAE_shared_model.cuda() # shares memory with worker instances CAE_shared_model.share_memory() A3C_shared_model.share_memory() print('A3C') print(A3C_shared_model) # intialize optimizer optimizer_cae = CAE_shared_model.createLossAndOptimizer( CAE_shared_model, 0.001) optimizer_a3c = SharedAdam(A3C_shared_model.parameters(), lr=args.lr) #optimizer.share_memory() # processes workers = [] # start train process (run for the set number of workers) for rank in range(args.num_processes): if rank == 0: worker = mp.Process(target=train_a3c, args=(rank, args, optimizer_a3c, A3C_shared_model, CAE_shared_model, optimizer_cae, True)) else: worker = mp.Process(target=train_a3c, args=(rank, args, optimizer_a3c, A3C_shared_model, CAE_shared_model, optimizer_cae, True)) worker.start() worker.append(worker) # test worker worker = mp.Process(target=test_a3c, args=(rank, args, A3C_shared_model, CAE_shared_model)) worker.start() workers.append(worker) # join all processes for worker in workers: worker.join()