for i in range(args.total_classes) ] # Declaring train and test datasets train_set = None test_set = iDataset(args, mean_image, data_generators=[], max_data_size=max_test_data_size, job='test') # Conditional variable, shared memory for synchronization cond_var = mp.Condition() train_counter = mp.Value("i", 0) test_counter = mp.Value("i", 0) dataQueue = mp.Queue() all_done = mp.Event() data_mgr = mp.Manager() expanded_classes = data_mgr.list([None for i in range(args.test_freq)]) if args.resume: print("resuming model from %s-model.pth.tar" % os.path.splitext(args.outfile)[0]) model = torch.load("%s-model.pth.tar" % os.path.splitext(args.outfile)[0], map_location=lambda storage, loc: storage) model.device = train_device model.exemplar_means = [] model.compute_means = True
def main_eval(args, create_shared_model, init_agent): np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) try: mp.set_start_method("spawn") except RuntimeError: pass model_to_open = args.load_model processes = [] res_queue = mp.Queue() if args.model == "BaseModel" or args.model == "GCN": args.learned_loss = False args.num_steps = 50 target = nonadaptivea3c_val else: args.learned_loss = True args.num_steps = 6 target = savn_val rank = 0 args.scene_types = ['living_room'] for scene_type in args.scene_types: p = mp.Process( target=target, args=( rank, args, model_to_open, create_shared_model, init_agent, res_queue, 250, scene_type, ), ) p.start() processes.append(p) time.sleep(0.1) rank += 1 count = 0 end_count = 0 train_scalars = ScalarMeanTracker() proc = len(args.scene_types) pbar = tqdm(total=250 * proc) try: while end_count < proc: train_result = res_queue.get() pbar.update(1) count += 1 if "END" in train_result: end_count += 1 continue train_scalars.add_scalars(train_result) tracked_means = train_scalars.pop_and_reset() finally: for p in processes: time.sleep(0.1) p.join() with open(args.results_json, "w") as fp: json.dump(tracked_means, fp, sort_keys=True, indent=4)
self.gnet.save_trans((s, a, r, s_next, done_flag)) s = s_next if total_step % UPDATE_GLOBAL_ITER == 0 or done: self.gnet.train_net(gamma=self.gamma, ac_net=self.lnet) if done: self.res_queue.put(score) Total_epoch += 1 print("Epoch:{} score:{}".format(Total_epoch, score)) break self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) gnet.share_memory() global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # parallel training workers = [Worker(gnet,None, global_ep, global_ep_r, res_queue, i, gamma=GAMMA) for i in range(mp.cpu_count())] [w.start() for w in workers] res = [] while True: r = res_queue.get() if r is not None: res.append(r) else: break [w.join() for w in workers] import matplotlib.pyplot as plt plt.plot(res)
shared_state["targetQ"].load_state_dict(targetQ.state_dict()) for i in range(max_id): shared_state["update"][i] = True if block == False: return 0 except Exception as e: print(e) if __name__ == '__main__': os.system('cls') vis.close() num_processes = 2 shared_queue = mp.Queue() shared_state = dict() shared_state["mainQ"] = DQN(s_dim, a_dim, dev).share_memory() shared_state["targetQ"] = DQN(s_dim, a_dim, dev).share_memory() shared_state["update"] = mp.Array('i', [0 for i in range(num_processes)]) # shared_state["wait"] = mp.Array('i', [0 for i in range(num_processes)]) shared_state["vis"] = mp.Value('i', 0) shared_state["wait"] = mp.Value('i', 0) shared_state["wait"].value = start_frame // 10 # for i in range(100): # actor_process(0,num_frames,shared_state,shared_queue,False) # actor_process(0,num_frames,shared_state,shared_queue,False) # learner_process(1,num_frames,shared_state,shared_queue,False)
f.write("model: " + model_name + "\n") print("using model {}".format(model_name)) tgt_net = ptan.agent.TargetNet(net) tm_net = dqn_model.TMPredict(env.observation_space.spaces['image'].shape, env.observation_space.spaces['logic'].nvec, env.action_space.n).to(device) buffer = ptan.experience.ExperienceReplayBuffer(experience_source=None, buffer_size=params['replay_size']) # optimizer = optim.Adam(net.parameters(), lr=params['learning_rate']) optimizer_tm = optim.Adam(tm_net.parameters(), lr=params['learning_rate']) optimizer = optim.RMSprop(net.parameters(), lr=params['learning_rate'], momentum=0.95, eps=0.01) exp_queue = mp.Queue(maxsize=PLAY_STEPS * 2) if args.fsa: fsa_nvec = env.observation_space.spaces['logic'].nvec logic_dim = int(fsa_nvec.shape[0] / env.observation_space.spaces['image'].shape[0]) fsa_nvec = fsa_nvec[-logic_dim:] play_proc = mp.Process(target=play_func, args=(params, net, args.cuda, args.fsa, exp_queue, fsa_nvec)) else: play_proc = mp.Process(target=play_func, args=(params, net, args.cuda, args.fsa, exp_queue)) play_proc.start() frame_idx = 0
def main(): setproctitle.setproctitle("Train/Test Manager") args = flag_parser.parse_arguments() if args.model == "BaseModel" or args.model == "GCN": args.learned_loss = False args.num_steps = 50 target = nonadaptivea3c_val if args.eval else nonadaptivea3c_train else: args.learned_loss = True args.num_steps = 6 target = savn_val if args.eval else savn_train create_shared_model = model_class(args.model) init_agent = agent_class(args.agent_type) optimizer_type = optimizer_class(args.optimizer) if args.eval: main_eval(args, create_shared_model, init_agent) return start_time = time.time() local_start_time_str = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(start_time)) np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if args.log_dir is not None: tb_log_dir = args.log_dir + "/" + args.title + "-" + local_start_time_str log_writer = SummaryWriter(log_dir=tb_log_dir) else: log_writer = SummaryWriter(comment=args.title) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) mp.set_start_method("spawn") shared_model = create_shared_model(args) train_total_ep = 0 n_frames = 0 if shared_model is not None: shared_model.share_memory() optimizer = optimizer_type( filter(lambda p: p.requires_grad, shared_model.parameters()), args) optimizer.share_memory() print(shared_model) else: assert (args.agent_type == "RandomNavigationAgent" ), "The model is None but agent is not random agent" optimizer = None processes = [] print('Start Loading!') optimal_action_path = './data/AI2thor_Combine_Dataset/Optimal_Path_Combine.json' with open(optimal_action_path, 'r') as read_file: optimal_action_dict = json.load(read_file) manager = Manager() optimal_action = manager.dict() optimal_action.update(optimal_action_dict) glove_file_path = './data/AI2thor_Combine_Dataset/det_feature_512_train.hdf5' glove_file = hdf5_to_dict(glove_file_path) # det_gt_path = './data/AI2thor_Combine_Dataset/Instance_Detection_Combine.pkl' # with open(det_gt_path, 'rb') as read_file: # det_gt = pickle.load(read_file) print('Loading Success!') end_flag = mp.Value(ctypes.c_bool, False) train_res_queue = mp.Queue() for rank in range(0, args.workers): p = mp.Process( target=target, args=( rank, args, create_shared_model, shared_model, init_agent, optimizer, train_res_queue, end_flag, glove_file, optimal_action, # det_gt, ), ) p.start() processes.append(p) time.sleep(0.1) print("Train agents created.") train_thin = args.train_thin train_scalars = ScalarMeanTracker() # start_ep_time = time.time() try: while train_total_ep < args.max_ep: train_result = train_res_queue.get() train_scalars.add_scalars(train_result) train_total_ep += 1 n_frames += train_result["ep_length"] # if train_total_ep % 10 == 0: # print(n_frames / train_total_ep) # print((time.time() - start_ep_time) / train_total_ep) if (train_total_ep % train_thin) == 0: log_writer.add_scalar("n_frames", n_frames, train_total_ep) tracked_means = train_scalars.pop_and_reset() for k in tracked_means: log_writer.add_scalar(k + "/train", tracked_means[k], train_total_ep) if (train_total_ep % args.ep_save_freq) == 0: print(n_frames) if not os.path.exists(args.save_model_dir): os.makedirs(args.save_model_dir) state_to_save = shared_model.state_dict() save_path = os.path.join( args.save_model_dir, "{0}_{1}_{2}_{3}.dat".format(args.title, n_frames, train_total_ep, local_start_time_str), ) torch.save(state_to_save, save_path) finally: log_writer.close() end_flag.value = True for p in processes: time.sleep(0.1) p.join()
record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) # global network gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters(), lr=1e-4, betas=(0.92, 0.999)) # global optimizer global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # parallel training workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] res = [] # record episode reward to plot while True: r = res_queue.get() if r is not None: res.append(r) else: break [w.join() for w in workers]
if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name, self.gnet, self.global_record) break s = s_ total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) # global network #gnet = torch.load("./data/model.pt") gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters(), lr=0.0001) # global optimizer global_ep, global_ep_r, res_queue, global_record = mp.Value( 'i', 0), mp.Value('d', 0.), mp.Queue(), mp.Value('d', -100.) # parallel training #workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count())] workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i, global_record) for i in range(6) ] [w.start() for w in workers] res = [] # record episode reward to plot while True: r = res_queue.get() if r is not None: res.append(r) else: break
def _test_empty_tensor_sharing(self, dtype, device): q = mp.Queue() empty = torch.tensor([], dtype=dtype, device=device) q.put(empty) out = q.get(timeout=1) self.assertEqual(out, empty)
for i in range(population_size): agent = AACAgent(obs_space.shape[0], act_space.shape[0]) hparams = Hparams( make_int_range(a_param), make_int_range(c_param), make_int_range(k_param), make_float_range(h_param), make_float_range(g_param), ) member = Member(i, agent, hparams) population.append(member) # Moving replay buffer to shared memory allows us to share it among workers without copying. replay_buffer.share_memory_() # Separate queue is created for each member to ensure correct member is sent to each worker. member_queues = {m.id: mp.Queue() for m in population} # Queue for sharing collect experiences exp_queue = mp.Queue() # Events for synchronization. Two different events are used to avoid possible race conditions. # Specifically, we need to clear each event before reusing it, but we don't want to clear it too early. step_events = (mp.Event(), mp.Event()) epoch_events = (mp.Event(), mp.Event()) num_gpus = torch.cuda.device_count() # Initialize workers. workers = [ Worker( i, make_env_function, max_episode_steps,
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) tcn = torch.nn.DataParallel( tcn, device_ids=(range(torch.cuda.device_count()) )) # Wrapper to distribute load on multiple GPUs attribute_classifier = DenseClassifier(num_classes=5).to( device) # load labeling network triplet_builder = builder(args.n_views, \ args.train_directory, args.labels_train_directory, IMAGE_SIZE, args, sample_size=32) queue = multiprocessing.Queue(1) dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) dataset_builder_process.start() optimizer = optim.SGD(list(tcn.parameters()) + list(attribute_classifier.parameters()), lr=args.lr_start, momentum=0.9) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=[100, 200, 500], gamma=0.1) criterion = nn.CrossEntropyLoss() trn_losses_ = [] val_losses_ = [] val_acc_margin_ = [] val_acc_no_margin_ = [] for epoch in range(args.start_epoch, args.start_epoch + args.epochs): losses = [] print("=" * 20) logger.info("Starting epoch: {0} learning rate: {1}".format( epoch, learning_rate_scheduler.get_lr())) learning_rate_scheduler.step() dataset = queue.get() data_loader = DataLoader( dataset=dataset, batch_size=args. minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, ) for _ in range(0, ITERATE_OVER_TRIPLETS): for i, minibatch in enumerate(data_loader): frames = minibatch[0] captions = minibatch[1] if use_cuda: frames = frames.cuda() captions = captions.to(device) print(captions) print(len(data_loader)) anchor_frames = frames[:, 0, :, :, :] positive_frames = frames[:, 1, :, :, :] negative_frames = frames[:, 2, :, :, :] anchor_output, unnormalized, mixed = tcn(anchor_frames) positive_output, _, _ = tcn(positive_frames) negative_output, _, _ = tcn(negative_frames) d_positive = distance(anchor_output, positive_output) d_negative = distance(anchor_output, negative_output) # features = encoder(anchor_frames) loss_triplet = torch.clamp(args.margin + d_positive - d_negative, min=0.0).mean() label_outputs_1, label_outputs_2 = attribute_classifier(mixed) labels_1 = captions[:, 0] # labels_2 = captions[:, 1] loss_1 = criterion(label_outputs_1, labels_1) # loss_2 = criterion(label_outputs_2, labels_2) loss_language = loss_1 #+ loss_2 # loss = loss_triplet + args.alpha * loss_language loss = loss_language # loss = loss_triplet losses.append(loss.data.cpu().numpy()) tcn.zero_grad() attribute_classifier.zero_grad() loss.backward() optimizer.step() trn_losses_.append(np.mean(losses)) logger.info('train loss: ', np.mean(losses)) if epoch % 1 == 0: acc_margin, acc_no_margin, loss = validate(tcn, attribute_classifier, criterion, use_cuda, args) val_losses_.append(loss) val_acc_margin_.append(acc_margin) val_acc_no_margin_.append(acc_no_margin) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model.') save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(trn_losses_, args.model_folder, 'train_loss') plot_mean(val_losses_, args.model_folder, 'validation_loss') # plot_mean(train_acc_, args.model_folder, 'train_acc') plot_mean(val_acc_margin_, args.model_folder, 'validation_accuracy_margin') plot_mean(val_acc_no_margin_, args.model_folder, 'validation_accuracy_no_margin')
gamma=args.lr_critic_gamma) # training log logger = Logger.Logger(path=os.path.join(path_results_folder, "training.csv"), column_names=[ "time", "iterations", "episodes", "training reward", "validation reward", "validation reward std", "validation reward min", "validation reward max", "loss policy", "loss critic", "sigma" ]) # create trainer workers rollouts = mp.Queue() flag_close = mp.Value("i", 0) l1_locks = [mp.Lock() for i in range(args.num_workers)] l2_locks = [mp.Lock() for i in range(args.num_workers)] for lock in l1_locks: lock.acquire() processes = [] for i in range(args.num_workers): print("(main) creating worker process number %d" % i) p = mp.Process(target=a2c_worker, args=(i, l1_locks[i], l2_locks[i], flag_close, rollouts, net_policy, net_critic, args)) p.start() processes.append(p)
def main(): print('Starting.') setproctitle.setproctitle('A3C Manager') args = flag_parser.parse_arguments() create_shared_model = model.Model init_agent = agent.A3CAgent optimizer_type = optimizer_class(args.optimizer) start_time = time.time() local_start_time_str = \ time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(start_time)) # Seed sources of randomness. np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if args.enable_logging: from tensorboardX import SummaryWriter log_dir = 'runs/' + args.title + '-' + local_start_time_str log_writer = SummaryWriter(log_dir=log_dir) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) mp.set_start_method('spawn', force=True) print('=> Creating the shared model and optimizer.') shared_model = create_shared_model(args) shared_model.share_memory() optimizer = optimizer_type( filter(lambda p: p.requires_grad, shared_model.parameters()), args) optimizer.share_memory() if (args.resume): shared_model.load_state_dict(torch.load('./models/last_model')) elif (args.load_model!=''): shared_model.load_state_dict(torch.load(args.load_model)) print('=> Creating the agents.') processes = [] end_flag = mp.Value(ctypes.c_bool, False) train_res_queue = mp.Queue() for rank in range(0, args.workers): p = mp.Process(target=train.train, args=( rank, args, create_shared_model, shared_model, init_agent, optimizer, train_res_queue, end_flag)) p.start() processes.append(p) print('* Agent created.') time.sleep(0.1) train_total_ep = 0 n_frames = 0 train_thin = args.train_thin train_scalars = ScalarMeanTracker() success_tracker = [] try: while train_total_ep < args.num_train_episodes: train_result = train_res_queue.get() train_scalars.add_scalars(train_result) train_total_ep += 1 n_frames += train_result["ep_length"] if train_total_ep % 100 == 0: torch.save(shared_model.state_dict(), './models/model_{}'.format(train_total_ep)) if args.enable_logging and train_total_ep % train_thin == 0: log_writer.add_scalar("n_frames", n_frames, train_total_ep) tracked_means = train_scalars.pop_and_reset() for k in tracked_means: log_writer.add_scalar( k + "/train", tracked_means[k], train_total_ep ) success_tracker.append(train_result["success"]) if len(success_tracker) > 100: success_tracker.pop(0) if len(success_tracker) >= 100 and sum(success_tracker) / len(success_tracker) > args.train_threshold: break finally: if args.enable_logging: log_writer.close() end_flag.value = True for p in processes: time.sleep(0.1) p.join() torch.save(shared_model.state_dict(), './models/last_model')
def __init__(self, input_source, detector, cfg, opt, mode='image', batchSize=1, queueSize=128): self.cfg = cfg self.opt = opt self.mode = mode self.device = opt.device if mode == 'image': self.img_dir = opt.inputpath self.imglist = [ os.path.join(self.img_dir, im_name.rstrip('\n').rstrip('\r')) for im_name in input_source ] self.datalen = len(input_source) elif mode == 'video': stream = cv2.VideoCapture(input_source) assert stream.isOpened(), 'Cannot capture source' self.path = input_source self.datalen = int(stream.get(cv2.CAP_PROP_FRAME_COUNT)) self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = stream.get(cv2.CAP_PROP_FPS) self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.videoinfo = { 'fourcc': self.fourcc, 'fps': self.fps, 'frameSize': self.frameSize } stream.release() self.detector = detector self.batchSize = batchSize leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) # initialize the queue used to store data """ image_queue: the buffer storing pre-processed images for object detection det_queue: the buffer storing human detection results pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.image_queue = Queue(maxsize=queueSize) self.det_queue = Queue(maxsize=10 * queueSize) self.pose_queue = Queue(maxsize=10 * queueSize) else: self._stopped = mp.Value('b', False) self.image_queue = mp.Queue(maxsize=queueSize) self.det_queue = mp.Queue(maxsize=10 * queueSize) self.pose_queue = mp.Queue(maxsize=10 * queueSize)
def test(cfg): if cfg.ckpt is not None: if not os.path.exists(cfg.ckpt): print('Invalid ckpt path:', cfg.ckpt) exit(1) ckpt = torch.load(cfg.ckpt, map_location=lambda storage, loc: storage) print(cfg.ckpt, 'loaded') loaded_cfg = ckpt['cfg'].__dict__ del loaded_cfg['num_workers'] del loaded_cfg['test_set'] del loaded_cfg['log_dir'] del loaded_cfg['prediction_file'] del loaded_cfg['num_episodes'] del loaded_cfg['use_pretrain'] del loaded_cfg['memory_num'] del loaded_cfg['memory_len'] del loaded_cfg['prepro_dir'] del loaded_cfg['debug'] cfg.__dict__.update(loaded_cfg) cfg.model = cfg.model.upper() print('Merged Config') pprint(cfg.__dict__) os.makedirs(cfg.log_dir) model = create_a3c_model(cfg) model.load_state_dict(ckpt['model']) else: os.makedirs(cfg.log_dir) model = create_a3c_model(cfg) print( "LOAD pretrain parameter for BERT from ./pretrain/pytorch_model.bin..." ) pretrain_param = torch.load('./pretrain/pytorch_model.bin', map_location=lambda storage, loc: storage) missing_keys = [] unexpected_keys = [] error_msgs = [] new_pretrain_param = pretrain_param.copy() for k, v in pretrain_param.items(): new_key = 'model.' + k new_pretrain_param[new_key] = v del new_pretrain_param[k] pretrain_param = new_pretrain_param.copy() metadata = getattr(pretrain_param, '_metadata', None) if metadata is not None: pretrain_param._metadata = metadata def load(module, prefix=''): local_metadata = {} if metadata is None else metadata.get( prefix[:-1], {}) module._load_from_state_dict(pretrain_param, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') load(model, prefix='') print("Weights of {} not initialized from pretrained model: {}".format( model.__class__.__name__, missing_keys)) print("Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) tokenizer = BertTokenizer.from_pretrained(cfg.bert_model) env = Environment(cfg, cfg.test_set, tokenizer, shuffle=False) print(env.dataset.path, 'loaded') queue = mp.Queue() procs = [] for i in range(cfg.num_workers): p = TestWorker(cfg, i, model, env, queue, tokenizer) if cfg.debug: p.run() else: p.start() procs.append(p) results = [] for p in procs: while True: running = p.is_alive() if not queue.empty(): result = queue.get() results.append(result) else: if not running: break for p in procs: p.join() exact_list = [] f1_list = [] full_action = [0 for _ in range(cfg.memory_num)] full_solvable = [] id_list = [] for i in range(len(results)): id_list.append(results[i]['doc']) full_solvable.append(results[i]['solvable']) exact_list.append(results[i]['exact']) f1_list.append(results[i]['f1']) for j in range(cfg.memory_num): full_action[j] += results[i]['actions'][j] qa_list = list( set(['_'.join(doc_id.split('_')[:-1]) for doc_id in id_list])) answers = dict() for qa_id in qa_list: answers[qa_id] = ('', -100000000) for i in range(len(results)): qa_id = '_'.join(id_list[i].split('_')[:-1]) score = results[i]['score'] answer = results[i]['answer'] if answers[qa_id][1] < score: answers[qa_id] = (answer, score) for qa_id in answers.keys(): answers[qa_id] = answers[qa_id][0] key_list = list(set(answers.keys())) solvables = [[] for i in range(len(key_list))] for i in range(len(full_solvable)): id_ = '_'.join(id_list[i].split('_')[:-1]) solv = full_solvable[i] idx = key_list.index(id_) solvables[idx].append(solv) for i in range(len(solvables)): if 1 in solvables[i]: solvables[i] = 1 else: solvables[i] = 0 with open(cfg.prediction_file, 'w', encoding='utf-8') as f: print(json.dumps(answers), file=f) results = get_score_from_trivia(cfg, cfg.test_set) exact = results['exact_match'] f1 = results['f1'] total_action_num = 0 for i in range(cfg.memory_num): total_action_num += full_action[i] avg_action = [0 for _ in range(cfg.memory_num)] for i in range(cfg.memory_num): avg_action[i] += full_action[i] / total_action_num print('All processes is finished.') print('ExactMatch: %.2f' % (sum(exact_list) / len(exact_list) * 100)) print('F1score: %.2f' % (sum(f1_list) / len(f1_list) * 100)) print() print('ExactMatch: %.2f' % (exact * 100)) print('F1score: %.2f' % (f1 * 100)) print() print('Solvables: %.2f' % (sum(full_solvable) / len(full_solvable) * 100)) print('Non duplicated Solvables: %.2f' % (sum(solvables) / len(solvables) * 100)) print() print('Total number of actions: %d' % (total_action_num)) for i in range(cfg.memory_num): print('Action %d : %.2f' % (i, avg_action[i] * 100))
if __name__ == "__main__": env = gym.make(ENV_NAME) #env.seed(2) MPS = 2 # meta population size meta_population = [Model(env.observation_space.shape[0],env.action_space.n, idx=i) for i in range(MPS)] # create arcive for models archive = [] writer = SummaryWriter() iterations = 300 #1500 # max iterations to run delta_reward_buffer = deque(maxlen=10) # buffer to store the reward gradients to see if rewards stay constant over a defined time horizont ~> local min W = 1 params_queues = [mp.Queue(maxsize=1) for _ in range(PROCESSES_COUNT)] rewards_queue = mp.Queue(maxsize=ITERS_PER_UPDATE) workers = [] for idx, params_queue in enumerate(params_queues): proc = mp.Process(target=worker_func, args=(idx, params_queue, rewards_queue, NOISE_STD)) proc.start() workers.append(proc) print("All started!") step_idx = 0 reward_history = [] reward_max =[] reward_min = [] reward_std = []
import torch from torch.optim import Adam import torch.nn as nn import time from .atari import create_atari_env from .models import Agent from tqdm import tqdm import torch.multiprocessing as mp LEARNING_RATE = 1e-4 WORKERS = 4 JOB_BLOCK = 50 ACTOR_WEIGHT = 0.5 MAX_PLAY_STEPS = 20 train_progress_queue = mp.Queue(1000) def play(env, agent, first_state, max_steps=20, render=False, action_code=(0, 2, 3)): done = False steps = 0 state = first_state trajectory = { 'states': [], 'rewards': [], 'actions_logprob': [],
def __init__(self, loader): self.dataset = loader.dataset self.scale = loader.scale self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory and torch.cuda.is_available() self.timeout = loader.timeout self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.worker_init_fn = loader.worker_init_fn self.index_queues = [ multiprocessing.Queue() for _ in range(self.num_workers) ] self.worker_queue_idx = 0 self.worker_result_queue = multiprocessing.Queue() self.batches_outstanding = 0 self.worker_pids_set = False self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} base_seed = torch.LongTensor(1).random_()[0] self.workers = [ multiprocessing.Process( target=_ms_loop, args=(self.dataset, self.index_queues[i], self.worker_result_queue, self.collate_fn, self.scale, base_seed + i, self.worker_init_fn, i)) for i in range(self.num_workers) ] if self.pin_memory or self.timeout > 0: self.data_queue = queue.Queue() if self.pin_memory: maybe_device_id = torch.cuda.current_device() else: # do not initialize cuda context if not necessary maybe_device_id = None self.worker_manager_thread = threading.Thread( target=_pin_memory_loop, args=( self.worker_result_queue, self.data_queue, maybe_device_id, self.done_event, )) self.worker_manager_thread.daemon = True self.worker_manager_thread.start() else: self.data_queue = self.worker_result_queue for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() _update_worker_pids(id(self), tuple(w.pid for w in self.workers)) _set_SIGCHLD_handler() self.worker_pids_set = True # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
def main(): parser = argparse.ArgumentParser(description='Train Hyperbolic Embeddings') parser.add_argument('-checkpoint', default='/tmp/hype_embeddings.pth', help='Where to store the model checkpoint') parser.add_argument('-dset', type=str, required=True, help='Dataset identifier') parser.add_argument('-dim', type=int, default=20, help='Embedding dimension') parser.add_argument('-manifold', type=str, default='poincare', choices=MANIFOLDS.keys(), help='Embedding manifold') parser.add_argument('-lr', type=float, default=1000, help='Learning rate') parser.add_argument('-epochs', type=int, default=100, help='Number of epochs') parser.add_argument('-batchsize', type=int, default=512, help='Batchsize') parser.add_argument('-negs', type=int, default=50, help='Number of negatives') parser.add_argument('-burnin', type=int, default=20, help='Epochs of burn in') parser.add_argument('-dampening', type=float, default=0.75, help='Sample dampening during burnin') parser.add_argument('-ndproc', type=int, default=4, help='Number of data loading processes') parser.add_argument('-eval_each', type=int, default=1, help='Run evaluation every n-th epoch') parser.add_argument('-fresh', action='store_true', default=False, help='Override checkpoint') parser.add_argument('-debug', action='store_true', default=False, help='Print debuggin output') parser.add_argument('-gpu', default=0, type=int, help='Which GPU to run on (-1 for no gpu)') parser.add_argument('-sym', action='store_true', default=False, help='Symmetrize dataset') parser.add_argument('-maxnorm', '-no-maxnorm', default='500000', action=Unsettable, type=int) parser.add_argument('-sparse', default=False, action='store_true', help='Use sparse gradients for embedding table') parser.add_argument('-burnin_multiplier', default=0.01, type=float) parser.add_argument('-neg_multiplier', default=1.0, type=float) parser.add_argument('-quiet', action='store_true', default=False) parser.add_argument('-lr_type', choices=['scale', 'constant'], default='constant') parser.add_argument('-train_threads', type=int, default=1, help='Number of threads to use in training') parser.add_argument('-logfolder', type=str, default='./log/', help='Path of log folder with a back slash') opt = parser.parse_args() # setup debugging and logigng log_file = opt.logfolder + "logging.txt" log_level = logging.DEBUG if opt.debug else logging.INFO log = logging.getLogger('lorentz') logging.basicConfig(level=log_level, format='%(message)s', filename=log_file) if opt.gpu >= 0 and opt.train_threads > 1: opt.gpu = -1 log.warning( f'Specified hogwild training with GPU, defaulting to CPU...') # set default tensor type th.set_default_tensor_type('torch.DoubleTensor') # set device device = th.device(f'cuda:{opt.gpu}' if opt.gpu >= 0 else 'cpu') # select manifold to optimize on manifold = MANIFOLDS[opt.manifold](debug=opt.debug, max_norm=opt.maxnorm) opt.dim = manifold.dim(opt.dim) if 'csv' in opt.dset: log.info('Using edge list dataloader') idx, objects, weights = load_edge_list(opt.dset, opt.sym) model, data, model_name, conf = initialize(manifold, opt, idx, objects, weights, sparse=opt.sparse) else: log.info('Using adjacency matrix dataloader') dset = load_adjacency_matrix(opt.dset, 'hdf5') log.info('Setting up dataset...') data = AdjacencyDataset(dset, opt.negs, opt.batchsize, opt.ndproc, opt.burnin > 0, sample_dampening=opt.dampening) model = Embedding(data.N, opt.dim, manifold, sparse=opt.sparse) objects = dset['objects'] # set burnin parameters data.neg_multiplier = opt.neg_multiplier train._lr_multiplier = opt.burnin_multiplier # Build config string for log log.info(f'json_conf: {json.dumps(vars(opt))}') if opt.lr_type == 'scale': opt.lr = opt.lr * opt.batchsize # setup optimizer optimizer = RiemannianSGD(model.optim_params(manifold), lr=opt.lr) # setup checkpoint checkpoint = LocalCheckpoint(opt.checkpoint, include_in_all={ 'conf': vars(opt), 'objects': objects }, start_fresh=opt.fresh) # get state from checkpoint state = checkpoint.initialize({'epoch': 0, 'model': model.state_dict()}) model.load_state_dict(state['model']) opt.epoch_start = state['epoch'] adj = {} for inputs, _ in data: for row in inputs: x = row[0].item() y = row[1].item() if x in adj: adj[x].add(y) else: adj[x] = {y} controlQ, logQ = mp.Queue(), mp.Queue() control_thread = mp.Process(target=async_eval, args=(adj, controlQ, logQ, opt)) control_thread.start() # control closure def control(model, epoch, elapsed, loss): """ Control thread to evaluate embedding """ lt = model.w_avg if hasattr(model, 'w_avg') else model.lt.weight.data manifold.normalize(lt) checkpoint.path = f'{opt.checkpoint}.{epoch}' checkpoint.save({ 'model': model.state_dict(), 'embeddings': lt, 'epoch': epoch, 'manifold': opt.manifold, }) controlQ.put((epoch, elapsed, loss, checkpoint.path)) while not logQ.empty(): lmsg, pth = logQ.get() shutil.move(pth, opt.checkpoint) log.info(f'json_stats: {json.dumps(lmsg)}') control.checkpoint = True model = model.to(device) if hasattr(model, 'w_avg'): model.w_avg = model.w_avg.to(device) if opt.train_threads > 1: threads = [] model = model.share_memory() args = (device, model, data, optimizer, opt, log) kwargs = {'ctrl': control, 'progress': not opt.quiet} for i in range(opt.train_threads): kwargs['rank'] = i threads.append( mp.Process(target=train.train, args=args, kwargs=kwargs)) threads[-1].start() [t.join() for t in threads] else: train.train(device, model, data, optimizer, opt, log, ctrl=control, progress=not opt.quiet) controlQ.put(None) control_thread.join() while not logQ.empty(): lmsg, pth = logQ.get() shutil.move(pth, opt.checkpoint) log.info(f'json_stats: {json.dumps(lmsg)}')
obs = share_memory(np.zeros(dimensions + env.observation_space.shape)) actions = share_memory(np.zeros(dimensions + env.action_space.shape)) logprobs = share_memory(np.zeros(dimensions)) rewards = share_memory(np.zeros(dimensions)) dones = share_memory(np.zeros(dimensions)) values = share_memory(np.zeros(dimensions)) traj_availables = share_memory(np.ones(dimensions)) raise actor_processes = [] data_processor_processes = [] ctx = mp.get_context("forkserver") stats_queue = MpQueue() # stats_queue = mp.Queue(1000) rollouts_queue = mp.Queue(1000) data_process_queue = mp.Queue(1000) data_process_back_queues = [] for i in range(args.num_rollout_workers): actor = mp.Process( target=act, args=[ args, experiment_name, i, lock, stats_queue, device, obs, actions, logprobs, rewards, dones, values, traj_availables ], ) actor.start() actor_processes.append(actor) # learner = ctx.Process(
def train(self): print('A3C算法股票交易系统 v0.0.0_1(Pong)') mp.set_start_method('spawn') os.environ['OMP_NUM_THREADS'] = "1" ''' parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") parser.add_argument("-n", "--name", required=True, help="Name of the run") args = parser.parse_args() ''' device = 'cuda:0' run_name = 'a3c' env, env_val, env_tst = A3cApp.make_env() #env = A3cApp.make_env() print('shape: {0}; n: {1};'.format(env.observation_space.shape, env.action_space.n)) net = A2cConv1dModel((1, env.observation_space.shape[0]), env.action_space.n) #.to(device) net.share_memory() optimizer = optim.Adam(net.parameters(), lr=AppConfig.a3c_config['LEARNING_RATE'], eps=1e-3) train_queue = mp.Queue(maxsize=AppConfig.a3c_config['PROCESSES_COUNT']) data_proc_list = [] for proc_idx in range(AppConfig.a3c_config['PROCESSES_COUNT']): proc_name = f"-a3c-grad_pong_{run_name}#{proc_idx}" p_args = (proc_name, net, device, train_queue) data_proc = mp.Process(target=A3cApp.grads_func, args=p_args) data_proc.start() data_proc_list.append(data_proc) batch = [] step_idx = 0 grad_buffer = None try: while True: train_entry = train_queue.get() if train_entry is None: break step_idx += 1 if grad_buffer is None: grad_buffer = train_entry else: for tgt_grad, grad in zip(grad_buffer, train_entry): tgt_grad += grad if step_idx % AppConfig.a3c_config['TRAIN_BATCH'] == 0: net.zero_grad() #yt for param, grad in zip(net.parameters(), grad_buffer): v1 = torch.FloatTensor(grad).to(device) if param.grad is not None: param.grad = torch.FloatTensor(grad).to(device) nn_utils.clip_grad_norm_(net.parameters(), AppConfig.a3c_config['CLIP_GRAD']) optimizer.step() grad_buffer = None finally: for p in data_proc_list: p.terminate() p.join()
parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") parser.add_argument("-n", "--name", required=True, help="Name of the run") args = parser.parse_args() device = "cuda" if args.cuda else "cpu" env = make_env() net = common.AtariA2C(env.observation_space.shape, env.action_space.n).to(device) net.share_memory() optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) train_queue = mp.Queue(maxsize=PROCESSES_COUNT) data_proc_list = [] for proc_idx in range(PROCESSES_COUNT): proc_name = f"-a3c-grad_pong_{args.name}#{proc_idx}" p_args = (proc_name, net, device, train_queue) data_proc = mp.Process(target=grads_func, args=p_args) data_proc.start() data_proc_list.append(data_proc) batch = [] step_idx = 0 grad_buffer = None try: while True: train_entry = train_queue.get()
td3_trainer=TD3_Trainer(replay_buffer, hidden_dim=hidden_dim, policy_target_update_interval=policy_target_update_interval, action_range=action_range ) if args.train: td3_trainer.load_model(model_path) td3_trainer.q_net1.share_memory() td3_trainer.q_net2.share_memory() td3_trainer.target_q_net1.share_memory() td3_trainer.target_q_net2.share_memory() td3_trainer.policy_net.share_memory() td3_trainer.target_policy_net.share_memory() ShareParameters(td3_trainer.q_optimizer1) ShareParameters(td3_trainer.q_optimizer2) ShareParameters(td3_trainer.policy_optimizer) rewards_queue=mp.Queue() # used for get rewards from all processes and plot the curve <<<<<<< HEAD num_workers=2 # or: mp.cpu_count() ======= num_workers=6 # or: mp.cpu_count() >>>>>>> a3bb147233dc6db8197439d80a187a2749327b3f processes=[] rewards=[] for i in range(num_workers): process = Process(target=worker, args=(i, td3_trainer, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \ update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, model_path)) # the args contain shared and not shared process.daemon=True # all processes closed when the main stops processes.append(process)
def train( self, args: Namespace, env_builder: Callable[[], Env], algo: RLAlgo ) -> None: """ Trains the algorithm on the environment given using the argument namespace as parameters. "args" must have the following attributes: { experiment_path (str): The path to save experiment results and models. render (bool): Render the environment. steps_per_episode (Optional[int]): The number of steps in each episode. silent (bool): Will run without standard output from agents. action_mask (Optional[Tuple[bool, ...]]): The action mask to mask or unmask. masked (Optional[bool]): If an action mask is given, should be True if the returned agent actions are already masked. default_action (Optional[Tuple[float, ...]]): If an action mask is given and going from masked -> unmasked, this should be the default values for the actions. decay (float): The gamma decay for the target Q-values. n_steps (int): The number of decay steps. num_agents (int): The number of agents to run concurrently, 0 is single process. model_sync_interval (int): The number of training steps between agent model syncs, if 0, all processes will share the same model. num_prefetch_batches (int): The number of batches to prefetch to the learner in distributed learning. local_batch_size (int): The number of experiences the agent sends at once in distributed learning. vectorized (bool): If the environment is vectorized. recurrent (bool),Make the network recurrent (using LSTM) play (bool): Runs the environment using the model instead of training. exploration (str, ["rnd", "munchausen"]): The type of exploration to use. episodes (int): The number of episodes to play for if playing. er_capacity (int): The alpha value for PER. batch_size (int): The batch size of the training set. training_steps (int): The number of training steps to train for. start_size (int): The size of the replay buffer before training. er_alpha (float): The alpha value for PER. er_beta (float): The alpha value for PER. er_beta_increment (float): The increment of the beta value on each sample for PER. er_epsilon (float): The epsilon value for PER. burn_in_length (int): If recurrent, the number of burn in samples for R2D2. sequence_length (int): If recurrent, the length of the sequence to train on. max_factor (int): If recurrent, factor of max priority to mean priority for R2D2. } Args: args: The namespace of arguments for training. env_builder: The nullary function to create the environment. algo: The algorithm to train. """ logs_path = None save_path = None if args.experiment_path is not None: logs_path = Path(args.experiment_path, "logs") logs_path.mkdir(parents=True, exist_ok=True) logs_path = str(logs_path) save_path = Path(args.experiment_path, "models") save_path.mkdir(parents=True, exist_ok=True) save_path = str(save_path) # Create agent class agent_builder = partial( OffPolicyAgent, algo=algo, render=args.render, silent=args.silent ) steps_per_episode = ( args.steps_per_episode if "steps_per_episode" in args else None ) agent_builder = compose( agent_builder, partial(TimeLimitAgent, max_steps=steps_per_episode) ) if not args.play: # Experience replay # Won't increment in multiple processes to keep it consistent # across actors er_beta_increment = ( args.er_beta_increment if args.num_agents == 0 else 0 ) if args.recurrent: experience_replay_func = partial( TorchR2D2, alpha=args.er_alpha, beta=args.er_beta, beta_increment=er_beta_increment, epsilon=args.er_epsilon, max_factor=args.max_factor ) else: experience_replay_func = partial( TorchPER, alpha=args.er_alpha, beta=args.er_beta, beta_increment=er_beta_increment, epsilon=args.er_epsilon ) if args.num_agents > 0: recv_pipes = [] send_pipes = [] prestart_func = None if args.model_sync_interval == 0: self._start_training(algo, args) algo.share_memory() recv_pipes = [None] * args.num_agents else: prestart_func = partial( self._start_training, algo=algo, args=args ) # Force CPU for now to avoid re-instantiating cuda in # subprocesses algo.device = torch.device("cpu") algo = algo.to(algo.device) for i in range(args.num_agents): param_pipe = mp.Pipe(duplex=False) recv_pipes.append(param_pipe[0]) send_pipes.append(param_pipe[1]) # Just needed to get the error/priority calculations dummy_experience_replay = experience_replay_func(capacity=1) # Must come before the other wrapper since there are infinite # recursion errors # TODO come up with a better way to implement wrappers agent_builder = compose( agent_builder, partial_iterator( QueueAgent, agent_id=(iter(range(args.num_agents)), True), experience_replay=(dummy_experience_replay, False), param_pipe=(iter(recv_pipes), True) ) ) agent_builder = compose( agent_builder, partial(TorchRLAgent, batch_state=not args.vectorized) ) if "action_mask" in args and args.action_mask: # TODO: Will have to add an action mask wrapper later if args.masked: agent_builder = compose( agent_builder, partial( UnmaskedActionAgent, action_mask=args.action_mask, default_action=args.default_action ) ) agent_builder = compose(agent_builder, TorchOffPolicyAgent) if args.recurrent: agent_builder = compose( agent_builder, SequenceInputAgent, TorchRecurrentAgent ) if args.play: algo = algo.to(args.device) algo.eval() agent_logger = ( None if logs_path is None else TensorboardLogger(logs_path + "/play-agent") ) agent = agent_builder(env=env_builder(), logger=agent_logger) agent.play(args.episodes) else: if args.exploration == "rnd": agent_builder = compose(agent_builder, IntrinsicRewardAgent) elif args.exploration == "munchausen": agent_builder = compose( agent_builder, partial(MunchausenAgent, alpha=0.9) ) algo.train() if args.recurrent: agent_builder = compose( agent_builder, partial( ExperienceSequenceAgent, sequence_length=( args.burn_in_length + args.sequence_length ), overlap=args.burn_in_length ) ) experience_replay = experience_replay_func( capacity=args.er_capacity ) base_agent_logs_path = None if logs_path is not None: base_agent_logs_path = logs_path + "/train-agent" # Single process if args.num_agents == 0: self._start_training(algo, args) agent_logger = None if base_agent_logs_path is not None: agent_logger = TensorboardLogger(base_agent_logs_path) agent = agent_builder(env=env_builder(), logger=agent_logger) agent.train( args.episodes, 1, args.discount, args.n_steps, experience_replay, args.batch_size, args.start_size, save_path, args.save_interval ) # Multiple processes else: done_event = mp.Event() # Number of agents + worker + learner queue_barrier = mp.Barrier(args.num_agents + 2) agent_queue = mp.Queue( maxsize=args.num_prefetch_batches * args.num_agents * 4 ) sample_queue = mp.Queue(maxsize=args.num_prefetch_batches) priority_queue = mp.Queue(maxsize=args.num_prefetch_batches) learner_args = (dummy_experience_replay,) learner_train_args = ( algo, done_event, queue_barrier, args.training_steps, sample_queue, priority_queue, send_pipes, args.model_sync_interval, save_path, args.save_interval ) worker = TorchApexWorker() worker_args = ( experience_replay, done_event, queue_barrier, agent_queue, sample_queue, priority_queue, args.batch_size, args.start_size ) agent_builders = [] agent_train_args = [] agent_train_kwargs = [] for i in range(args.num_agents): agent_logger = None if base_agent_logs_path is not None: agent_logs_path = ( base_agent_logs_path + "-" + str(i + 1) ) agent_logger = TensorboardLogger(agent_logs_path) agent_builders.append( partial(agent_builder, logger=agent_logger) ) agent_train_args.append(( 1, args.local_batch_size, args.discount, args.n_steps, agent_queue, queue_barrier )) agent_train_kwargs.append({ "exit_condition": done_event.is_set }) runner = ApexRunner(done_event) runner.start( learner_args, learner_train_args, worker, worker_args, env_builder, agent_builders, agent_train_args, agent_train_kwargs, prestart_func )
def __init__(self, loader): super(_MultiProcessingDataLoaderIter, self).__init__(loader) assert self.num_workers > 0 self.worker_init_fn = loader.worker_init_fn self.worker_queue_idx_cycle = itertools.cycle(range(self.num_workers)) self.worker_result_queue = multiprocessing.Queue() self.worker_pids_set = False self.shutdown = False self.send_idx = 0 # idx of the next task to be sent to workers self.rcvd_idx = 0 # idx of the next task to be returned in __next__ # information about data not yet yielded, i.e., tasks w/ indices in range [rcvd_idx, send_idx). # map: task idx => - (worker_id,) if data isn't fetched (outstanding) # \ (worker_id, data) if data is already fetched (out-of-order) self.task_info = {} self.tasks_outstanding = 0 # always equal to count(v for v in task_info.values() if len(v) == 1) self.workers_done_event = multiprocessing.Event() self.index_queues = [] self.workers = [] # A list of booleans representing whether each worker still has work to # do, i.e., not having exhausted its iterable dataset object. It always # contains all `True`s if not using an iterable-style dataset # (i.e., if kind != Iterable). self.workers_status = [] for i in range(self.num_workers): index_queue = multiprocessing.Queue() # index_queue.cancel_join_thread() w = multiprocessing.Process( target=_utils.worker._worker_loop, args=(self.dataset_kind, self.dataset, index_queue, self.worker_result_queue, self.workers_done_event, self.auto_collation, self.collate_fn, self.drop_last, self.base_seed + i, self.worker_init_fn, i, self.num_workers)) w.daemon = True # NB: Process.start() actually take some time as it needs to # start a process and pass the arguments over via a pipe. # Therefore, we only add a worker to self.workers list after # it started, so that we do not call .join() if program dies # before it starts, and __del__ tries to join but will get: # AssertionError: can only join a started process. w.start() self.index_queues.append(index_queue) self.workers.append(w) self.workers_status.append(True) if self.pin_memory: self.pin_memory_thread_done_event = threading.Event() self.data_queue = queue.Queue() pin_memory_thread = threading.Thread( target=_utils.pin_memory._pin_memory_loop, args=(self.worker_result_queue, self.data_queue, torch.cuda.current_device(), self.pin_memory_thread_done_event)) pin_memory_thread.daemon = True pin_memory_thread.start() # Similar to workers (see comment above), we only register # pin_memory_thread once it is started. self.pin_memory_thread = pin_memory_thread else: self.data_queue = self.worker_result_queue _utils.signal_handling._set_worker_pids( id(self), tuple(w.pid for w in self.workers)) _utils.signal_handling._set_SIGCHLD_handler() self.worker_pids_set = True # prime the prefetch loop for _ in range(2 * self.num_workers): self._try_put_index()
self.global_episode.value += 1 self.lock.release() state_collections = torch.FloatTensor([[]]) action_collections = torch.FloatTensor([]) reward_collections = torch.FloatTensor([]) time.sleep(0.5) break if __name__ == '__main__': env = gym.make('CartPole-v1') LEARN = True NUMBER = int(mp.cpu_count() / 2) global_episode = mp.Value('i', 0) MAX_EPISODE = 30000 queue = mp.Queue() lock = mp.Lock() policy = PolicyNet(n_state=2 * env.observation_space.shape[0], n_action=env.action_space.n) policy.share_memory() agent = Agent(policy=policy, n_state=2 * env.observation_space.shape[0], n_action=env.action_space.n, learn=LEARN, queue=queue, global_episode=global_episode) workers = [ Worker(policy=policy, worker_id=i,
def ppo_learn(replay_buffer,replay_buffer_reward,env,model,cov_matrix,model_optim): #some problem with multiprocessing and this is the solution #https://github.com/pytorch/pytorch/issues/973#issuecomment-346405667 rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) np.random.seed(0) current_best_reward = float('-inf') global_iteration_counter = 0 optimization_history_list = [] while True: new_samples = [] #multiprocessing q = mp.Queue(maxsize = C.max_worker) sample_counter = 0 for iteration_index in range(0,int(C.max_new_episode/C.max_worker)+1): p_list = [] for worker in range(0,C.max_worker): try: if sample_counter < C.max_new_episode: p = mp.Process(target = roll_out_once.roll_out_once,\ args = (q,env,model,cov_matrix)) p.start() p_list.append(p) sample_counter += 1 else: raise Exception("Don't need to start new thread") except: pass for j in range(len(p_list)): res = q.get() new_samples.append(res[0]) # observation_list,action_list,log_prob_action_list,reward_list = \ # roll_out_once.roll_out_once(env,model,cov_matrix) new_sample_reward = [] for new_sample in new_samples: #drop old simulation experience if len(replay_buffer) > C.replay_buffer_size: drop_index = np.argmin(replay_buffer_reward) replay_buffer.pop(drop_index) replay_buffer_reward.pop(drop_index) #add the new simulation result to the replay buffer total_reward = np.sum(new_sample['reward_list']) replay_buffer_reward.append(total_reward) replay_buffer.append(new_sample) new_sample_reward.append(new_sample['reward_list']) global_iteration_counter += 1 print('this is global iteration ',global_iteration_counter) print('the current reward is',np.mean(new_sample_reward)) #record the optimization process optimization_history_list.append(np.mean(new_sample_reward)) optimization_history = {} optimization_history['objective_history'] = optimization_history_list cwd = os.getcwd() #cwd = os.path.join(cwd, 'data_folder') parameter_file = 'optimization_history.json' cwd = os.path.join(cwd,parameter_file) with open(cwd, 'w') as statusFile: statusFile.write(jsonpickle.encode(optimization_history)) if np.mean(new_sample_reward) > current_best_reward: current_best_reward = np.mean(new_sample_reward) #save the neural network model cwd = os.getcwd() parameter_file = 'pendulum_nn_trained_model.pt' cwd = os.path.join(cwd,parameter_file) torch.save(model.state_dict(),cwd) #we can update the model more than once because we are using off-line data for update_iteration in range(0,10): #sample experience from the replay buffer for training # new_replay_buffer_reward = [] # for entry in replay_buffer_reward: # new_replay_buffer_reward.append(np.log(entry)) # sample_probability = (np.exp(new_replay_buffer_reward))/np.sum(np.exp(new_replay_buffer_reward)) #apply softmax to the total_reward list sampled_off_line_data = [] for sample_counter in range(0,C.training_batch_size): #sampled_index = np.random.choice(np.arange(0, len(replay_buffer)), p=sample_probability.tolist()) sampled_index = np.random.randint(0,len(replay_buffer)-1) sampled_off_line_data.append(replay_buffer[sampled_index]) #compute the loss and update model #total_loss = torch.tensor([0.0], requires_grad=True) total_loss = 0 model.zero_grad() baseline_reward = 0 #for sample_index in range(0,len(sampled_off_line_data)): # off_line_data = sampled_off_line_data[sample_index] # baseline_reward += np.sum(off_line_data['reward_list']) #baseline_reward = baseline_reward/len(sampled_off_line_data) for sample_index in range(0,len(sampled_off_line_data)): off_line_data = sampled_off_line_data[sample_index] actor_log_prob_mean = model(off_line_data['observation_list']) dist = MultivariateNormal(actor_log_prob_mean, cov_matrix) actor_log_prob = dist.log_prob(off_line_data['action_list']) #calculate the ratio for adjusting off-line data ratios = torch.exp(actor_log_prob - off_line_data['log_prob_action_list']) ratio = torch.prod(ratios) #vanila policy gradient loss #vanila_pg_loss = off_line_data['log_prob_action_list']*np.sum(off_line_data['reward_list']) vanila_pg_loss = torch.sum(actor_log_prob)*(np.sum(off_line_data['reward_list'])-baseline_reward) #compute the ppo loss temp_loss1 = ratio*vanila_pg_loss temp_loss2 = torch.clamp(ratio,1-C.ppo_clip,1+C.ppo_clip)*vanila_pg_loss total_loss = total_loss - torch.min(temp_loss1,temp_loss2) total_loss = total_loss/len(sampled_off_line_data) #update the model model.zero_grad() total_loss.backward() model_optim.step()
def _fire_process(self, dataloader, prefetch): self.queue = mp.Queue(prefetch) self.process = mp.Process(target=_prefetch_generator, args=(dataloader, self.queue, self._batchify)) self.process.start()
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory and torch.cuda.is_available() self.timeout = loader.timeout self.sample_iter = iter(self.batch_sampler) base_seed = torch.LongTensor(1).random_().item() if self.num_workers > 0: self.worker_init_fn = loader.worker_init_fn self.worker_queue_idx = 0 self.worker_result_queue = multiprocessing.Queue() self.batches_outstanding = 0 self.worker_pids_set = False self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.done_event = multiprocessing.Event() self.index_queues = [] self.workers = [] for i in range(self.num_workers): index_queue = multiprocessing.Queue() w = multiprocessing.Process( target=_worker_loop, args=(self.dataset, index_queue, self.worker_result_queue, self.done_event, self.collate_fn, base_seed + i, self.worker_init_fn, i)) w.daemon = True # ensure that the worker exits on process exit # Process.start() actually take some time as it needs to start a # process and pass the arguments over via a pipe. Therefore, we # only add a worker to self.workers list after it started, so # that we do not call .join() if program dies before it starts, # and __del__ tries to join it but will get: # AssertionError: can only join a started process. w.start() self.index_queues.append(index_queue) self.workers.append(w) if self.pin_memory: self.data_queue = queue.Queue() self.pin_memory_thread = threading.Thread( target=_pin_memory_loop, args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory, torch.cuda.current_device())) self.pin_memory_thread.daemon = True self.pin_memory_thread.start() else: self.data_queue = self.worker_result_queue _update_worker_pids(id(self), tuple(w.pid for w in self.workers)) _set_SIGCHLD_handler() self.worker_pids_set = True # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
def _test_gpt2_config_pp(self, tmpdir, mp_size, pp_size, mp_resize, pp_resize): @distributed_test(world_size=pp_size * mp_size) def _run_baseline(inputs, tag, output, quit_event): reset_random() args_defaults = { 'num_layers': 8, 'hidden_size': 128, 'num_attention_heads': 8, 'max_position_embeddings': 128, } topo = self.get_topology(mp_size, pp_size, mp_size * pp_size) gpt2_pipe_model = GPT2ModelPipe(num_layers=8, num_stages=pp_size, mp_size=mp_size, args_others=args_defaults, topo=topo) model = self.get_deepspeed_model(gpt2_pipe_model, tmpdir) with torch.no_grad(): inputs = [x.cuda() for x in inputs] if model.is_first_stage() or model.is_last_stage(): loader = RepeatingLoader([(inputs[0], 0)]) data_iter = iter(loader) else: data_iter = None baseline = model.eval_batch(data_iter=data_iter, compute_loss=False, reduce_output=None) if baseline is not None: # baseline should be [[hidden, True]]] assert len(baseline) == 1 assert len(baseline[0]) == 1 assert torch.is_tensor(baseline[0][0]) output.put(baseline[0][0].cpu()) state_dict = {} state_dict['checkpoint_version'] = get_megatron_version() model.save_checkpoint(tmpdir, tag=tag, client_state=state_dict) quit_event.wait() @distributed_test(world_size=mp_resize * pp_resize) def _run_resize(inputs, tag, output, quit_event): reset_random() args_defaults = { 'num_layers': 8, 'hidden_size': 128, 'num_attention_heads': 8, 'max_position_embeddings': 128, } topo = self.get_topology(mp_resize, pp_resize, mp_resize * pp_resize) gpt2_pipe_model = GPT2ModelPipe(num_layers=8, num_stages=pp_resize, mp_size=mp_resize, args_others=args_defaults, topo=topo) model = self.get_deepspeed_model(gpt2_pipe_model, tmpdir) with torch.no_grad(): model.load_checkpoint(tmpdir, tag=tag, load_optimizer_states=False, load_lr_scheduler_states=False) inputs = [x.cuda() for x in inputs] if model.is_first_stage() or model.is_last_stage(): loader = RepeatingLoader([(inputs[0], 0)]) data_iter = iter(loader) else: data_iter = None test = model.eval_batch(data_iter=data_iter, compute_loss=False, reduce_output=None) if test is not None: # test should be [[hidden, True]]] assert len(test) == 1 assert len(test[0]) == 1 assert torch.is_tensor(test[0][0]) output.put(test[0][0].cpu()) quit_event.wait() def _verify(b_queue, t_queue, baseline_event, test_event): baseline = b_queue.get() baseline_event.set() test = t_queue.get() test_event.set() assert torch.allclose( baseline, test, atol=1e-03 ), f"Baseline output {baseline} is not equal to save-then-load output {test}" tag = f'mp_{mp_size}to{mp_resize}_pp_{pp_size}to{pp_resize}' baseline = mp.Queue() test = mp.Queue() baseline_event = mp.Event() test_event = mp.Event() verify_process = mp.Process(target=_verify, args=(baseline, test, baseline_event, test_event)) verify_process.start() inputs = self.get_inputs() _run_baseline(inputs, tag, baseline, baseline_event) _run_resize(inputs, tag, test, test_event) verify_process.join()