def HandleWorkers(server: socket.socket, replay_memory: ReplayMemory, mem_lock: Lock, param_queue: Queue, shutdown: Value): print("Listening for new workers...") server.settimeout(1) # timeout period of 1 second num_workers = 0 workers: Dict[int, socket.socket] = dict() state_dict = None while shutdown.value <= 0: try: worker, _ = server.accept() print("Connected to new worker") worker_id = num_workers worker_proc = Process(target=ReceivePlayouts, args=(worker, worker_id, replay_memory, mem_lock), daemon=True) worker_proc.start() if state_dict is not None: # Send the new worker the most up-to-date params buffer = io.BytesIO() torch.save(state_dict, buffer) param_bytes = buffer.getvalue() communication.Send(worker, buffer.getvalue()) workers[worker_id] = worker num_workers += 1 except socket.timeout: pass if not param_queue.empty(): # Send the most up-to-date params to all the workers state_dict = None while not param_queue.empty(): state_dict = param_queue.get() assert (state_dict is not None) buffer = io.BytesIO() torch.save(state_dict, buffer) param_bytes = buffer.getvalue() print("Sending new params to workers") for worker_id in workers.keys(): worker: socket.socket = workers[worker_id] try: communication.Send(worker, param_bytes) except: # Something went wrong with this connection, so remove # this worker print(f"Error with worker {worker_id}, ending connection") workers.pop(worker_id)
def self_multiplay(policy): q = Queue() finq = [] procs = [] policy.train(False) for i in range(MaxProcessNum): fin = Queue() t = Process(target=PlayProcess, args=(i, q, fin, policy)) t.start() procs.append(t) finq.append(fin) for i in range(MaxProcessNum): id = finq[i].get() print("finish process(%d)" % id) sys.stdout.flush() try: while not q.empty(): data_buffer.append(q.get(timeout=1)) except TimeoutError: pass print('finish Queue get') sys.stdout.flush() for i in range(len(procs)): p = procs[i] p.join(timeout=10) if p.is_alive(): print('forcing process(%d) to terminate' % i) sys.stdout.flush() p.terminate() print('finish join') sys.stdout.flush()
def _run_game(process_id: int, game_factory: GameExecutorFactory, network: nn.Module, device: torch.device, request_queue: Queue, experience_queue: Queue, batch_size: int, transfer_blocks: int, transfer_to_device: bool) -> None: exploration_rate = 1. game = game_factory.create() print('* worker %d started' % process_id) while True: try: if not request_queue.empty(): request: _RunGameRequest = request_queue.get(block=False) if request.do_terminate: print('* game worker %d terminated' % process_id) experience_queue.close() request_queue.close() return if request.set_exploration_rate is not None: exploration_rate = request.set_exploration_rate block = [] for _ in range(transfer_blocks): eps, exps = game.multi_step(network, device, exploration_rate, batch_size) if transfer_to_device: exps = [ e.to_device(device, non_blocking=False) for e in exps ] block.append((eps, exps)) experience_queue.put(block, block=True) except Exception as e: print('error in worker %d: ' % process_id, e)
def main(): args = parse_args() categories = parse_categories(parse_data(args.data)['names']) cap = cv2.VideoCapture(0) frame_queue = Queue() preds_queue = Queue() cur_dets = None frame_lock = Lock() proc = Process(target=detect, args=(frame_queue, preds_queue, frame_lock, args)) proc.start() try: while (True): ret, frame = cap.read() frame_lock.acquire() while not frame_queue.empty(): frame_queue.get() frame_queue.put(frame) frame_lock.release() if not preds_queue.empty(): cur_dets = preds_queue.get() if cur_dets is not None and len(cur_dets) > 0: frame = draw_detections_opencv(frame, cur_dets[0], categories) cv2.imshow('frame', frame) cv2.waitKey(1) except KeyboardInterrupt: print('Interrupted') proc.join() cap.release() cv2.destroyAllWindows()
def dynamic_power(model, input_shape): q = Queue() power_return = Queue() interval_return = Queue() latency_return = Queue() input_tensor_queue = Queue() model_queue = Queue() input_tensor = torch.ones([*input_shape]) input_tensor_queue.put(input_tensor) model.share_memory() model_queue.put(model) context = torch.multiprocessing.get_context('spawn') p_thread = context.Process(target=power_thread, args=(power_return, interval_return, q)) l_thread = context.Process(target=latency_thread, args=(model_queue, input_tensor_queue, latency_return, q)) l_thread.start() p_thread.start() power_l = list() # GPU power list interval_l = list() # power interval list latency_l = list() # latency list l_thread.join() while True: if not power_return.empty(): power_l.append(power_return.get()) if not interval_return.empty(): interval_l.append(interval_return.get()) if not latency_return.empty(): latency_l.append(latency_return.get()) if power_return.empty() and interval_return.empty( ) and latency_return.empty(): break power_return.close() interval_return.close() latency_return.close() q.close() del q del power_return del latency_return del interval_return return latency_l, power_l, interval_l
class IterableParquetDataset(IterableDataset): def __init__(self, path, process_func): super().__init__() dataset = ds.dataset(path) self.process_func = process_func self.batches = Queue() [self.batches.put(batch) for batch in dataset.to_batches()] def __iter__(self): while True: if self.batches.empty() == True: self.batches.close() break batch = self.batches.get().to_pydict() batch.update(self.process_func(batch)) yield batch
class WorkerManager: def __init__(self, n_workers, actor, args): self._now_episode = Value('i', 0) self.queue = Queue() self.collect_event = Event() self.worker = [] for i in range(n_workers): self.worker.append( Worker(self.queue, self.collect_event, actor, args)) time.sleep(1) self.process = [ Process(target=self.worker[i].run, args=(self._now_episode, )) for i in range(n_workers) ] for p in self.process: p.start() print(f'Start {n_workers} workers.') def collect(self): result = [] self.collect_event.set() while self.collect_event.is_set(): # WAIT FOR DATA COLLECT END pass for w in self.worker: w.event.wait() while not self.queue.empty(): result.append(self.queue.get()) for w in self.worker: w.event.clear() return result def now_episode(self): value = self._now_episode.value return value
def crop_face(args): for k, v in default_args.items(): setattr(args, k, v) assert osp.exists(args.data_dir), "The input dir not exist" root_folder_name = args.data_dir.split('/')[-1] src_folder = args.data_dir dst_folder = args.data_dir.replace(root_folder_name, root_folder_name + '_OPPOFaces') lz.mkdir_p(dst_folder, delete=False) ds = TestData(src_folder) loader = torch.utils.data.DataLoader(ds, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True, drop_last=False ) # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping queue = Queue() lock = Lock() consumers = [] for i in range(args.num_consumers): p = Process(target=consumer, args=(queue, lock)) p.daemon = True consumers.append(p) for c in consumers: c.start() # 3. forward ttl_nimgs = 0 ttl_imgs = [] data_meter = lz.AverageMeter() model_meter = lz.AverageMeter() post_meter = lz.AverageMeter() lz.timer.since_last_check('start crop face') for ind, data in enumerate(loader): data_meter.update(lz.timer.since_last_check(verbose=False)) if (data['finish'] == 1).all().item(): logging.info('finish') break if ind % 10 == 0: logging.info( f'proc batch {ind}, data time: {data_meter.avg:.2f}, model: {model_meter.avg:.2f}, post: {post_meter.avg:.2f}') mask = data['finish'] == 0 input = data['img'][mask] input_np = input.numpy() roi_box = data['roi_box'][mask].numpy() imgfn = np.asarray(data['imgfn'])[mask.numpy().astype(bool)] dst_imgfn = [img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') for img_fp in imgfn] ttl_imgs.extend(dst_imgfn) ttl_nimgs += mask.sum().item() with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().astype(np.float32) model_meter.update(lz.timer.since_last_check(verbose=False)) queue.put((imgfn, param, roi_box, dst_imgfn)) # pts68 = [predict_68pts(param[i], roi_box[i]) for i in range(param.shape[0])] # pts68_proc = [predict_68pts(param[i], [0, 0, STD_SIZE, STD_SIZE]) for i in range(param.shape[0])] # for img_fp, pts68_, pts68_proc_, img_, dst in zip(imgfn, pts68, pts68_proc, input_np, dst_imgfn): # ## this may need opt to async read write # img_ori = cvb.read_img(img_fp) # pts5 = to_landmark5(pts68_[:2, :].transpose()) # warped = preprocess(img_ori, landmark=pts5) # # plt_imshow(warped, inp_mode='bgr'); plt.show() # lz.mkdir_p(osp.dirname(dst), delete=False) # cvb.write_img(warped, dst) # # ## this may cause black margin # # pts5 = to_landmark5(pts68_proc_[:2, :].transpose()) # # warped = preprocess(to_img(img_), landmark=pts5) # # # plt_imshow(warped, inp_mode='bgr'); plt.show() # # dst = img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') # # cvb.write_img(warped, dst) # if args.dump_res: # img_ori = cvb.read_img(img_fp) # pts_res = [pts68_] # dst = img_fp.replace(root_folder_name, root_folder_name + '_kpts.demo') # lz.mkdir_p(osp.dirname(dst), delete=False) # draw_landmarks(img_ori, pts_res, # wfp=dst, # show_flg=args.show_flg) post_meter.update(lz.timer.since_last_check(verbose=False)) lz.msgpack_dump(ttl_imgs, dst_folder + '/' + 'all_imgs.pk') del model, input torch.cuda.empty_cache() while not queue.empty(): time.sleep(1)
def train(): np.random.seed(random_seed) torch.manual_seed(random_seed) writer = SummaryWriter() ac = AC(latent_num, cnn_chanel_num, stat_dim) writer.add_graph(ac, (torch.zeros([1, 1, img_shape[0], img_shape[1] ]), torch.zeros([1, stat_dim]))) optim = GlobalAdam([{ 'params': ac.encode_img.parameters(), 'lr': 2.5e-5 }, { 'params': ac.encode_stat.parameters(), 'lr': 2.5e-5 }, { 'params': ac.pi.parameters(), 'lr': 2.5e-5 }, { 'params': ac.actor.parameters(), 'lr': 2.5e-5 }, { 'params': ac.f.parameters() }, { 'params': ac.V.parameters() }], lr=5e-3, weight_decay=weight_decay) if os.path.exists('S3_state_dict.pt'): ac.load_state_dict(torch.load('S3_state_dict.pt')) optim.load_state_dict(torch.load('S3_Optim_state_dict.pt')) else: ac.load_state_dict(torch.load('../stage2/S2_state_dict.pt'), strict=False) result_queue = Queue() validate_queue = Queue() gradient_queue = Queue() loss_queue = Queue() ep_cnt = Value('i', 0) optimizer_lock = Lock() processes = [] ac.share_memory() optimizer_worker = Process(target=update_shared_model, args=(gradient_queue, optimizer_lock, optim, ac)) optimizer_worker.start() for no in range(mp.cpu_count() - 3): worker = Worker(no, ac, ep_cnt, optimizer_lock, result_queue, gradient_queue, loss_queue) worker.start() processes.append(worker) validater = Validate(ac, ep_cnt, optimizer_lock, validate_queue) validater.start() best_reward = 0 while True: with ep_cnt.get_lock(): if not result_queue.empty(): ep_cnt.value += 1 reward, money, win_rate = result_queue.get() objective_actor, loss_critic, loss_f = loss_queue.get() writer.add_scalar('Interaction/Reward', reward, ep_cnt.value) writer.add_scalar('Interaction/Money', money, ep_cnt.value) writer.add_scalar('Interaction/win_rate', win_rate, ep_cnt.value) writer.add_scalar('Update/objective_actor', objective_actor, ep_cnt.value) writer.add_scalar('Update/loss_critic', loss_critic, ep_cnt.value) writer.add_scalar('Update/loss_f', loss_f, ep_cnt.value) with optimizer_lock: if reward > best_reward: best_reward = reward torch.save(ac.state_dict(), 'S3_BEST_state_dict.pt') if ep_cnt.value % save_every == 0: torch.save(ac.state_dict(), 'S3_state_dict.pt') torch.save(optim.state_dict(), 'S3_Optim_state_dict.pt') if not validate_queue.empty(): val_reward, val_money, val_win_rate = validate_queue.get() writer.add_scalar('Validation/reward', val_reward, ep_cnt.value) writer.add_scalar('Validation/money', val_money, ep_cnt.value) writer.add_scalar('Validation/win_rate', val_win_rate, ep_cnt.value) for worker in processes: worker.join() optimizer_worker.kill()
consumers.append(p) for c in consumers: c.start() comb_from_ = comb_from[0] assert osp.exists(f'{fea_root}/{comb_from_}') for fn in glob.glob(f'{fea_root}/{comb_from_}/facescrub/**/*.bin', recursive=True): fn2 = fn.replace(comb_from[0], comb_from[1]) assert osp.exists(fn2), fn2 fn3 = None # fn3 = fn.replace(comb_from[0], comb_from[2]) dstfn = fn.replace(comb_from[0], dst_name) queue.put((fn, fn2, fn3, dstfn)) for ind, imgfn in enumerate(imgfns): if ind % 99 == 0: print(ind, len(imgfns)) fn = f'{fea_root}/{comb_from[0]}/megaface/{imgfn}' fn2 = f'{fea_root}/{comb_from[1]}/megaface/{imgfn}' fn3 = f'{fea_root}/{comb_from[2]}/megaface/{imgfn}' fn = glob.glob(f'{fn}*.bin')[0] fn2 = glob.glob(f'{fn2}*.bin')[0] assert osp.exists(fn2), fn2 fn3 = None # fn3 = glob.glob(f'{fn3}*.bin')[0] dstfn = fn2.replace(comb_from[1], dst_name) # if not osp.exists((dstfn)): # mkdir_p(osp.dirname(dstfn), delete=False) queue.put((fn, fn2, fn3, dstfn)) while not queue.empty(): time.sleep(1) print('wait ...')
class MultiprocessAsyncGameExecutor(AsyncGameExecutor): def __init__(self, game_factory: GameExecutorFactory, network: nn.Module, device: torch.device, processes: int, batches_ahead: int, batch_size: int, states_on_device: bool): self._states_on_device = states_on_device self._device = device self._experience_queue = Queue(maxsize=processes + 1) block_size = max(1, batches_ahead - processes) self.block_buffer = [] print('* starting %d workers (batch size: %d, block size: %d)' % (processes, batch_size, block_size)) self._processes = [] self._request_queues = [] for i in range(processes): request_queue = Queue(maxsize=10) # Transfer to GPU in the other process does not work.. it does not throw an error, but training does not converge p = Process(target=_run_game, args=( i, game_factory, network, device, request_queue, self._experience_queue, batch_size, block_size, False, )) p.start() self._request_queues.append(request_queue) self._processes.append(p) def _send_to_all(self, request, block=False): for request_queue in self._request_queues: request_queue.put(request, block=block) def get_experiences(self): if len(self.block_buffer) == 0: block_buffer = self._experience_queue.get(block=True) if self._states_on_device: for eps, exps in block_buffer: exps = [e.to_device(self._device) for e in exps] self.block_buffer.append((eps, exps)) else: self.block_buffer.extend(block_buffer) return self.block_buffer.pop() def update_exploration_rate(self, exploration_rate): self._send_to_all( _RunGameRequest(set_exploration_rate=exploration_rate), block=True) def close(self): print('* shutting down workers') self._send_to_all(_RunGameRequest(do_terminate=True)) # wake the workers try: while not self._experience_queue.empty(): try: self._experience_queue.get(block=False) except queue.Empty: pass except ConnectionResetError: pass except FileNotFoundError: pass self._experience_queue.close() for p in self._processes: p.join(1000) for q in self._request_queues: q.close() self._experience_queue.close()
print("Already better than target, breaking...") break r_list = [0] * pop_size # result list solutions = es.ask() # push parameters to queue for s_id, s in enumerate(solutions): for _ in range(n_samples): p_queue.put((s_id, s)) # retrieve results if args.display: pbar = tqdm(total=pop_size * n_samples) for _ in range(pop_size * n_samples): while r_queue.empty(): sleep(.1) r_s_id, r = r_queue.get() r_list[r_s_id] += r / n_samples if args.display: pbar.update(1) if args.display: pbar.close() es.tell(solutions, r_list) es.disp() # evaluation and saving if epoch % log_step == log_step - 1: best_params, best, std_best = evaluate(solutions, r_list) print("Current evaluation: {}".format(best))
def train_explorer(logdir, epochs=10, n_samples=4, pop_size=4, display=True, max_workers=10): results = {} results['best'] = [] # multiprocessing variables num_workers = min(max_workers, n_samples * pop_size) time_limit = 1000 # create tmp dir if non existent and clean it if existent tmp_dir = join(logdir, 'tmp_exp') if not exists(tmp_dir): mkdir(tmp_dir) else: for fname in listdir(tmp_dir): unlink(join(tmp_dir, fname)) # create exp dir if non exitent explore_dir = join(logdir, 'explore') if not exists(explore_dir): mkdir(explore_dir) ################################################################################ # Thread routines # ################################################################################ def slave_routine(p_queue, r_queue, e_queue, p_index): """ Thread routine. Threads interact with p_queue, the parameters queue, r_queue, the result queue and e_queue the end queue. They pull parameters from p_queue, execute the corresponding rollout, then place the result in r_queue. Each parameter has its own unique id. Parameters are pulled as tuples (s_id, params) and results are pushed as (s_id, result). The same parameter can appear multiple times in p_queue, displaying the same id each time. As soon as e_queue is non empty, the thread terminate. When multiple gpus are involved, the assigned gpu is determined by the process index p_index (gpu = p_index % n_gpus). :args p_queue: queue containing couples (s_id, parameters) to evaluate :args r_queue: where to place results (s_id, results) :args e_queue: as soon as not empty, terminate :args p_index: the process index """ # init routine gpu = p_index % torch.cuda.device_count() device = torch.device( 'cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu') # redirect streams sys.stdout = open(join(tmp_dir, str(getpid()) + '.out'), 'a') sys.stderr = open(join(tmp_dir, str(getpid()) + '.err'), 'a') # with torch.no_grad(): # r_gen = RolloutGenerator(logdir, device, time_limit) # while e_queue.empty(): # if p_queue.empty(): # sleep(.1) # else: # s_id, params = p_queue.get() # r_queue.put((s_id, r_gen.rollout(params))) with torch.no_grad(): r_gen = RolloutGenerator(logdir, device, time_limit) while e_queue.empty(): if p_queue.empty(): sleep(.1) else: s_id, params = p_queue.get() r_queue.put((s_id, r_gen.rollout(params))) ################################################################################ # Define queues and start workers # ################################################################################ p_queue = Queue() r_queue = Queue() e_queue = Queue() for p_index in range(num_workers): Process(target=slave_routine, args=(p_queue, r_queue, e_queue, p_index)).start() ################################################################################ # Evaluation # ################################################################################ def evaluate(solutions, results, rollouts=100): """ Give current controller evaluation. Evaluation is minus the cumulated reward averaged over rollout runs. :args solutions: CMA set of solutions :args results: corresponding results :args rollouts: number of rollouts :returns: minus averaged cumulated reward """ index_min = np.argmin(results) best_guess = solutions[index_min] restimates = [] for s_id in range(rollouts): p_queue.put((s_id, best_guess)) print("Evaluating...") for _ in tqdm(range(rollouts)): while r_queue.empty(): sleep(.1) restimates.append(r_queue.get()[1]) return best_guess, np.mean(restimates), np.std(restimates) ################################################################################ # Launch CMA # ################################################################################ controller = Controller(LSIZE, RSIZE, ASIZE) # dummy instance # define current best and load parameters cur_best = None ctrl_file = join(explore_dir, 'best.tar') print("Attempting to load previous best...") if exists(ctrl_file): state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'}) cur_best = -state['reward'] controller.load_state_dict(state['state_dict']) print("Previous best was {}...".format(-cur_best)) parameters = controller.parameters() es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1, {'popsize': pop_size}) epoch = 0 log_step = 3 while not es.stop(): if cur_best is not None and -cur_best > target_return: print("Already better than target, breaking...") break r_list = [0] * pop_size # result list solutions = es.ask() # push parameters to queue for s_id, s in enumerate(solutions): for _ in range(n_samples): p_queue.put((s_id, s)) # retrieve results if display: pbar = tqdm(total=pop_size * n_samples) for _ in range(pop_size * n_samples): while r_queue.empty(): sleep(.1) r_s_id, r = r_queue.get() r_list[r_s_id] += r / n_samples if display: pbar.update(1) if display: pbar.close() es.tell(solutions, r_list) es.disp() # evaluation and saving if epoch % log_step == log_step - 1: best_params, best, std_best = evaluate(solutions, r_list) # log the best results['best'].append(best) print("Current evaluation: {}".format(best)) if not cur_best or cur_best > best: cur_best = best print("Saving new best with value {}+-{}...".format( -cur_best, std_best)) load_parameters(best_params, controller) torch.save( { 'epoch': epoch, 'reward': -cur_best, 'state_dict': controller.state_dict() }, join(explore_dir, 'best.tar')) if -best > target_return: print( "Terminating controller training with value {}...".format( best)) break epoch += 1 es.result_pretty() e_queue.put('EOP') return results
class VideoProcessingPipeline(object): """ Manages the acquisition and preprocessing of video frames from the webcam. A pipeline with two processes is used: the first process denoises frames and queues the result to the second process which calculates the optical flows on CPU, and queues back the moving average to the main process. This moving average is used as attention prior by the model. """ def __init__(self, img_size, img_cfg, frames_window=13, flows_window=5, skip_frames=2, cam_res=(640, 480), denoising=True): """ :param img_size: the images input size of the neural network. :param img_cfg: the config parameters for image processing. :param frames_window: the number of webcam frames input at once into the neural network to make a prediction step. Best results tend to be obtained for roughly a bit less than one second. :param flows_window: the number of optical flows used to calculate an attention prior. Defaults to 5. Change at your own risks. :param skip_frames: down-sampling factor of the webcam frames. Defaults to 2 in order to roughly obtain 15 FPS with a 30 FPS webcam. This down-sampling is basic and could be improved to support ratios such as 2/3 to obtain 20 FPS. :param cam_res: webcam resolution (width, height). The application was only tested in 640x480. Change at your own risks. :param denoising: activate the denoising process. Defaults to True. Most usefull with low quality webcams. """ if frames_window not in [9, 13, 17, 21]: raise ValueError('Invalid window size for webcam frames: `%s`' % str(frames_window)) if flows_window not in [3, 5, 7, 9]: raise ValueError('Invalid window size for optical flows: `%s`' % str(flows_window)) if flows_window > frames_window: raise ValueError( 'Optical flow window cannot be wider than camera frames window' ) self.img_size = img_size # optical flows can be computed in lower resolution w/o harming results self.opt_size = img_size // 2 self.frames_window = frames_window self.flows_window = flows_window self.skip_frames = skip_frames self.total_frames = 0 # total number of frames acquired self.cam_res = cam_res self.denoising = denoising self.img_frames = [ np.zeros((self.img_size, self.img_size, 3), dtype=np.uint8) ] * (self.frames_window // 2) self.gray_frames = [ np.zeros((self.opt_size, self.opt_size), dtype=np.uint8) ] * (self.frames_window // 2) self.priors = [] # init multiprocessing self.q_parent, self.q_prior = Queue(), Queue() # start denoising process if self.denoising: self.q_denoise = Queue() self.p_denoise = Process( target=denoise_frame, args=(self.q_denoise, self.q_prior, img_cfg.getint('h'), img_cfg.getint('template_window_size'), img_cfg.getint('search_window_size'))) self.p_denoise.start() print('Denoising enabled') else: print('Denoising disabled') # start prior calculation process self.p_prior = Process(target=calc_attention_prior, args=(self.opt_size, self.flows_window, self.q_prior, self.q_parent)) self.p_prior.start() # initialise camera self.cap = cv.VideoCapture(0) if self.cap.isOpened(): self.cap_fps = int(round(self.cap.get(cv.CAP_PROP_FPS))) self.cap.set(3, self.cam_res[0]) self.cap.set(4, self.cam_res[1]) print('Device @%d FPS' % self.cap_fps) else: raise IOError('Failed to open webcam capture') # raw images self.last_frame = collections.deque(maxlen=self.cap_fps) # cropped region of the raw images self.last_cropped_frame = collections.deque(maxlen=self.cap_fps) # acquire and preprocess the exact number of frames needed # to make the first prior map for i in range((frames_window // 2) + 1): self.acquire_next_frame(enable_skip=False) # now wait for the first prior to be returned while len(self.priors) == 0: if not self.q_parent.empty(): # de-queue a prior prior, flow = self.q_parent.get(block=False) self.priors.append(prior) # sleep while the queue is empty time.sleep(0.01) def _center_crop(self, img, target_shape): """ Returns a center crop of the provided image. :param img: the image to crop. :param target_shape: the dimensions of the crop. :return the cropped image """ h, w = target_shape y, x = img.shape[:2] start_y = max(0, y // 2 - (h // 2)) start_x = max(0, x // 2 - (w // 2)) return img[start_y:start_y + h, start_x:start_x + w] def acquire_next_frame(self, enable_skip=True): """ Reads the next frame from the webcam and starts the asynchronous preprocessing. The video stream is down-sampled as necessary to reach the desired FPS. :param enable_skip: enables down-sampling of the webcam stream. Must be True except during initialisation. :return: the last frame acquired or None if that frame was skipped due to down-sampling of the webcam stream. """ ret, frame = self.cap.read() if not ret: self.terminate() raise IOError('Failed to read the next frame from webcam') self.total_frames += 1 if not enable_skip: return self._preprocess_frame(frame) elif (self.total_frames % self.skip_frames) == 0: return self._preprocess_frame(frame) return None def _preprocess_frame(self, frame): """ Crops, change to gray scale, resizes and sends the newly acquired webcam frame to the preprocessing pipeline. :param frame: the last acquired frame. :return the last acquired frame. """ # crop a square at the center of the frame rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB) rgb = self._center_crop(rgb, (self.cam_res[1], self.cam_res[1])) self.last_frame.append(frame) self.last_cropped_frame.append(rgb) # convert to gray scale and resize gray = cv.cvtColor(rgb, cv.COLOR_RGB2GRAY) gray = cv.resize(gray, (self.opt_size, self.opt_size)) rgb = cv.resize(rgb, (self.img_size, self.img_size)) # queue to relevant child process if self.denoising: self.q_denoise.put(gray) else: self.q_prior.put(gray) self.img_frames.append(rgb) self.gray_frames.append(gray) return frame def get_model_input(self, dequeue=True): """ Gets the list of images and the prior needed for the inference of the current frame. Use `dequeue` to retrieve the next prior from the queue. The caller must first verify that the queue is non-empty. :param dequeue: must be set to True except during initialisation. :return: images ndarray and the corresponding prior """ # de-queue a prior if dequeue: prior, flow = self.q_parent.get(block=False) self.priors.append(prior) # ensure enough frames have been preprocessed n_frames = self.frames_window assert len(self.img_frames) >= n_frames assert len(self.gray_frames) >= n_frames assert len(self.priors) == 1 imgs = np.stack(self.img_frames[:self.frames_window], axis=0) self.img_frames.pop(0) # slide window to the right self.gray_frames.pop(0) return imgs, [self.priors.pop(0)] def terminate(self): """Terminates processes, closes queues and releases video capture.""" if self.denoising: self.q_denoise.put(None) time.sleep(0.2) self.p_denoise.terminate() else: self.q_prior.put(None) time.sleep(0.2) self.p_prior.terminate() time.sleep(0.1) if self.denoising: self.p_denoise.join(timeout=0.5) self.p_prior.join(timeout=0.5) if self.denoising: self.q_denoise.close() self.q_parent.close() self.cap.release()
class PPOTrainer: def __init__(self, args): tmp_env = make_env(args.env) self.obs_shape = tmp_env.observation_space.shape self.num_actions = tmp_env.action_space.n self.c_in = self.obs_shape[0] del tmp_env self.horizon = args.horizon self.eta = args.eta self.epoch = args.epoch self.batch_size = args.batch * args.actors self.gamma = args.gamma self.lam = args.lam self.num_actors = args.actors self.eps = args.eps self.num_iter = ( args.epoch * args.actors * args.horizon ) // self.batch_size # how many times to run SGD on the buffer self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.queues = [Queue() for i in range(self.num_actors)] self.barrier = Queue( ) # This is used as a waiting mechanism, to wait for all the agents to env.step() self.score_channel = Queue() # these are shmem np.arrays self.state, self.reward, self.finished = self.init_shared() self.workers = [ Worker(i, args.env, self.queues[i], self.barrier, self.state, self.reward, self.finished, self.score_channel) for i in range(self.num_actors) ] self.start_workers() self.model = Policy(self.c_in, self.num_actions).to(self.device) self.optim = torch.optim.Adam(self.model.parameters(), lr=self.eta) # used for logging and graphing self.stat = { 'scores': [], 'steps': [], 'clip_losses': [], 'value_losses': [], 'entropies': [] } def init_shared(self): state_shape = (self.num_actors, *self.obs_shape) scalar_shape = (self.num_actors, 1) state = np.empty(state_shape, dtype=np.float32) state = RawArray(c_float, state.reshape(-1)) state = np.frombuffer(state, c_float).reshape(state_shape) reward = np.empty(scalar_shape, dtype=np.float32) reward = RawArray(c_float, reward.reshape(-1)) reward = np.frombuffer(reward, c_float).reshape(scalar_shape) finished = np.empty(scalar_shape, dtype=np.float32) finished = RawArray(c_float, finished.reshape(-1)) finished = np.frombuffer(finished, c_float).reshape(scalar_shape) return state, reward, finished def start_workers(self): for worker in self.workers: worker.start() def initialize_state(self): for i in range(self.num_actors): self.queues[i].put(-1) self.wait_for_agents() @timing_wrapper def broadcast_actions(self, actions): actions = actions.cpu().numpy() for i in range(self.num_actors): self.queues[i].put(actions[i]) self.wait_for_agents() next_state = torch.tensor(self.state).to(self.device) reward = torch.tensor(self.reward).to(self.device) done = torch.tensor(self.finished).to(self.device) return next_state, reward, done def wait_for_agents(self): for i in range(self.num_actors): self.barrier.get() def setup_scheduler(self, T_max): num_steps = T_max // (self.horizon * self.num_actors) self.scheduler = torch.optim.lr_scheduler.LambdaLR( self.optim, lambda x: max(1 - x / num_steps, 0)) @timing_wrapper def train(self, T_max, graph_name=None): self.setup_scheduler(T_max) global_step = 0 self.initialize_state() state = torch.tensor(self.state).to(self.device) while global_step < T_max: states = [] actions = [] rewards = [] finished = [] sampled_lps = [] # sampled log probabilities values = [] time_start = time.time() duration_fwd = 0 with torch.no_grad(): for t in range(self.horizon): global_step += self.num_actors logit, value = self.model(state) prob = torch.softmax(logit, dim=1) log_prob = torch.log_softmax(logit, dim=1) action = prob.multinomial(1) sampled_lp = log_prob.gather(1, action) (next_state, reward, done), duration_brdcst = self.broadcast_actions(action) # appending to buffer states.append(state) actions.append(action) rewards.append(reward) finished.append(done) sampled_lps.append(sampled_lp) values.append(value) state = next_state duration_fwd += duration_brdcst _, V = self.model(next_state) values.append(V) time_forward = time.time() # GAE estimation GAEs, duration_GAE = self.compute_GAE(rewards, finished, values) duration_backward = self.run_gradient_descent( states, actions, sampled_lps, values, GAEs) time_end = time.time() total_duration = time_end - time_start percent_broadcast = duration_fwd / total_duration * 100 percent_forward = (time_forward - time_start) / total_duration * 100 percent_GAE = duration_GAE / total_duration * 100 percent_backward = duration_backward / total_duration * 100 # print(f"<Time> Total: {total_duration:.2f} | forward: {percent_forward:.2f}% (broadcast {percent_broadcast:.2f}%) | GAE: {percent_GAE:.2f}% | backward: {percent_backward:.2f}%") if global_step % (self.num_actors * self.horizon * 30) == 0: while not self.score_channel.empty(): score, step = self.score_channel.get() self.stat['scores'].append(score) self.stat['steps'].append(step) now = datetime.datetime.now().strftime("%H:%M") print( f"Step {global_step} | Mean of last 10 scores: {np.mean(self.stat['scores'][-10:]):.2f} | Time: {now}" ) if graph_name is not None: plot(global_step, self.stat, graph_name) # Finish plot(global_step, self.stat, graph_name) @timing_wrapper def compute_GAE(self, rewards, finished, values): GAEs = [] advantage = 0 for i in reversed(range(self.horizon)): td_error = rewards[i] + ( 1 - finished[i]) * self.gamma * values[i + 1] - values[i] advantage = td_error + ( 1 - finished[i]) * self.gamma * self.lam * advantage GAEs.append(advantage) GAEs = torch.cat(GAEs[::-1]).to(self.device) # NOTE: Below is currently not in use because I don't know how to incorporate the 'finished' tensor into account # NOTE: This version is much, much faster than the python-looped version above # NOTE: But in terms of the total time taken, it doesn't make much of a difference. (~2% compared to ~0.05%) # rewards = torch.stack(rewards) # finished = torch.stack(finished) # values = torch.stack(values) # td_error = rewards + (1 - finished) * self.gamma * values[1:] - values[:-1] # td_error = td_error.cpu() # GAEs = scipy.signal.lfilter([1], [1, -self.gamma * self.lam], td_error.flip(dims=(0,)), axis=0) # GAEs = np.flip(GAEs, axis=0) # flip it back again # GAEs = GAEs.reshape(-1, GAEs.shape[-1]) # (horizon, num_actors, 1) --> (horizon * num_actors, 1) # GAEs = torch.tensor(GAEs).float().to(self.device) return GAEs @timing_wrapper def run_gradient_descent(self, states, actions, sampled_lps, values, GAEs): states = torch.cat(states) actions = torch.cat(actions) sampled_lps = torch.cat(sampled_lps) values = torch.cat(values[:-1]) targets = GAEs + values self.scheduler.step() # Running SGD for K epochs for it in range(self.num_iter): # Batch indices idx = np.random.randint(0, self.horizon * self.num_actors, self.batch_size) state = states[idx] action = actions[idx] sampled_lp = sampled_lps[idx] GAE = GAEs[idx] value = values[idx] target = targets[idx] # Normalize advantages GAE = (GAE - GAE.mean()) / (GAE.std() + 1e-8) logit_new, value_new = self.model(state) # Clipped values are needed because sometimes values can unexpectedly get really big clipped_value_new = value + torch.clamp(value_new - value, -self.eps, self.eps) # Calculating policy loss prob_new = torch.softmax(logit_new, dim=1) lp_new = torch.log_softmax(logit_new, dim=1) entropy = -(prob_new * lp_new).sum(1).mean() sampled_lp_new = lp_new.gather(1, action) ratio = torch.exp(sampled_lp_new - sampled_lp) surr1 = ratio * GAE surr2 = torch.clamp(ratio, 1 - self.eps, 1 + self.eps) * GAE clip_loss = torch.min(surr1, surr2).mean() # Calculating value loss value_loss1 = (value_new - target).pow(2) value_loss2 = (clipped_value_new - target).pow(2) value_loss = 0.5 * torch.max(value_loss1, value_loss2).mean() final_loss = -clip_loss + value_loss - 0.01 * entropy self.optim.zero_grad() final_loss.backward() # total_norm = 0 # for p in self.model.parameters(): # param_norm = p.grad.data.norm(2) # total_norm += param_norm.item() ** 2 # total_norm = total_norm ** (1. / 2) # print(total_norm) torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1) self.optim.step() # graphing self.stat['clip_losses'].append(clip_loss.item()) self.stat['value_losses'].append(value_loss.item()) self.stat['entropies'].append(entropy.item())
def train(experiment: int, batch: int, resume: bool): cfg = OthelloConfig(experiment, batch) manager = Manager() buffer = manager.list() replay_buffer = ReplayBuffer(buffer) shared_state_dicts = manager.dict() message_queue = Queue() log_queue = Queue( ) # a single log is dictionary and "gs", "type" keys are must writer = SummaryWriter(cfg.dir_log) if resume: print("Loading replay buffer to resume training...") with open(cfg.dir_replay_buffer, "rb") as f: buff_list = pickle.load(f) replay_buffer.save_training_data(buff_list) del buff_list print("Replay buffer loaded.") training_worker = TrainingWorker("Training Worker", message_queue, log_queue, shared_state_dicts, replay_buffer, cfg.device_name_tw, cfg, resume) evaluation_worker = EvaluationWorker("Evaluation Worker", message_queue, log_queue, shared_state_dicts, cfg.device_name_ew, cfg, resume) self_play_workers = [] for i in range(cfg.num_self_play_workers): self_play_workers.append( SelfPlayWorker("Self-Play Worker-" + str(i), message_queue, log_queue, shared_state_dicts, replay_buffer, cfg.device_names_sp[i], cfg)) print("Starting training...") training_worker.start() evaluation_worker.start() for worker in self_play_workers: worker.start() print("Training started.") try: while training_worker.is_alive(): if log_queue.empty(): time.sleep(1.0) continue log = log_queue.get() for k, v in log.items(): if k in ["gs", "type"]: continue if log["type"] == "scalar": writer.add_scalar(k, v, log["gs"]) else: print("Unknown log type found:", log["type"]) del log except KeyboardInterrupt: print("KeyboardInterrupt, stopping training...") finally: for i in range(cfg.num_self_play_workers * 5): message_queue.put(cfg.message_interrupt) training_worker.join() evaluation_worker.join() for worker in self_play_workers: worker.join() print("Saving replay buffer...") buff_list = list(buffer) with open(cfg.dir_replay_buffer, "wb") as f: pickle.dump(buff_list, f) del buff_list print("Replay buffer saved.")
class Synthetic(Process): def __init__(self, agent, dataloader, settings): super().__init__() self.agent = agent self.dataloader = dataloader self.settings = settings self.queue = Queue(maxsize=settings.QUEUE_LEN) self.put_flag = Queue(maxsize=1) self.get_flag = Queue(maxsize=1) self.done = False def update_settings(self, settings): self.settings = settings def update_agent(self, target_agent): self.agent.load_state_dict(target_agent.state_dict()) def fetch_data(self): num_batch = self.settings.NUM_BATCH_WHILE_SYNTHETIC while self.put_flag.empty(): out = [] for _ in range(num_batch): d = self.queue.get() if self.queue.qsize() < num_batch: self.queue.put(d) out.append(d) yield utils.cat_namedtuple_list(out, dim=0) # Put a single to flag self.get_flag.put(True) def run(self): """ Generate Data Queue """ settings = self.settings for d in self.dataloader: episode_data, episode_interpolate_ratio, episode_source_pose = [], [], [] mesh = d["mesh"].to(settings.SYNTHETIC_DEVICE) raw_data = utils.variable_namedtuple(d["data"], settings.SYNTHETIC_DEVICE) source_pose = raw_data.init_pose target_pose = raw_data.target_pose intrinsic = raw_data.Intrinsic settings.set_intrinsic(intrinsic) for _ in range(settings.SYNTHETIC_EPISODE_LEN): episode_source_pose.append(source_pose) center_points, center_depth = utils.translation_to_voxel_and_depth( source_pose.Translation.translation, intrinsic, self.settings) try: syn_data, interpolate_ratio = self.agent.synthetic( observed_image=raw_data.image, observed_depth=raw_data.depth, observed_mask=raw_data.mask, init_pose=source_pose, mesh=mesh, center_points=center_points, center_depth=center_depth, settings=settings) if settings.SYNTHETIC_EPISODE_LEN > 1: state_feature, mask, flow = self.agent.state_encoding( syn_data) action = self.agent.action_encoding( state_feature, interpolate_ratio) source_pose = utils.apply_action_to_pose( action, source_pose, settings) source_pose = utils.detach_namedtuple(source_pose) episode_data.append(syn_data) episode_interpolate_ratio.append(interpolate_ratio) except Exception as e: print(e) if len(episode_data) != settings.SYNTHETIC_EPISODE_LEN or len( episode_interpolate_ratio ) != settings.SYNTHETIC_EPISODE_LEN: # Something may be wrong while generating data continue # append data to queue for i in range(settings.SYNTHETIC_EPISODE_LEN): syn_raw_data = utils.SynRawData( data=episode_data[i], Intrinsic=intrinsic, target_pose=target_pose, init_pose=episode_source_pose[i], model_points=raw_data.model_points, interpolate_ratio=episode_interpolate_ratio[i]) syn_raw_data = utils.variable_namedtuple(syn_raw_data, device="cpu") self.queue.put(syn_raw_data) # Put a single to flag self.put_flag.put(True) # Waiting for main thread finish last data fetch while self.get_flag.empty(): time.sleep(2)
def run(args): p_queue = Queue() r_queue = Queue() e_queue = Queue() latent = 32 mixture = 256 size = latent + mixture controller = Controller(size, 3) for i in range(args.max_workers): Process(target=slave_routine, args=(p_queue, r_queue, e_queue, i, args.logdir)).start() cur_best = None savefile = args.logdir/'best.tar' if savefile.exists(): print(f'Loading from {savefile}') state = torch.load(savefile.as_posix(), map_location={'cuda:0': 'cpu'}) cur_best = -state['reward'] controller.load_state_dict(state['state_dict']) parameters = controller.parameters() sigma = 0.1 es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), sigma, {'popsize': args.pop_size}) epoch = 0 while not es.stop(): if cur_best is not None and -cur_best > args.target_return: print('Already better than target, breaking...') break r_list = [0] * args.pop_size # result list solutions = es.ask() # push parameters to queue for s_id, s in enumerate(solutions): for _ in range(args.n_samples): p_queue.put((s_id, s)) # Retrieve results if args.display: pbar = tqdm(total=args.pop_size * args.n_samples) for _ in range(args.pop_size * args.n_samples): while r_queue.empty(): sleep(.1) r_s_id, r = r_queue.get() r_list[r_s_id] += r / args.n_samples if args.display: pbar.update(1) if args.display: pbar.close() es.tell(solutions, r_list) es.disp() # CMA-ES seeks to minimize, so we want to multiply the reward we # get in a rollout by -1. best_params, best, std_best = evaluate(solutions, r_list, p_queue, r_queue) if (not cur_best) or (cur_best > best): cur_best = best print(f'Saving new best with value {-cur_best}+{-std_best}') load_parameters(best_params, controller) torch.save({'epoch': epoch, 'reward': -cur_best, 'state_dict': controller.state_dict()}, savefile) # Save after every epoch torch.save(controller.state_dict(), f'{controller_pt}') if -best > args.target_return: print(f'Terminating controller training with value {best}...') break epoch += 1 es.result_pretty() e_queue.put('EOP')
0 ] * pop_size # result list. like np.zeros(pop_size).tolist() solutions = es.ask() # push parameters to queue for s_id, s in enumerate(solutions): for _ in range(n_samples): p_queue.put((s_id, s)) # This slave call is stealing the data the other slave calls needs.. if epoch % log_step != 0: slave_routine() # fill r_queque with p_queue WITH IS FROM ABOVE # print("we just put something in p_queue") while not r_queue.empty(): # print("We are in this for loop?") result_list_idx, r = r_queue.get() try: result_list[result_list_idx] += r / n_samples # print(f'r_queue is not empty', result_list) except Exception as e: print(f'result_list_idx is {result_list_idx}') print(f'Caught error. {e}') es.tell(solutions, result_list) es.disp() # evaluation and saving if epoch % log_step == 0: slave_routine(
a_dim, g_net, g_opt, update_iter=10, is_render=is_render, use_cuda=use_cuda) # (self, env_id, idx, child_conn, queue, s_dim, a_dim, g_net, g_opt, update_iter=10, is_render=False, use_cuda=False): worker.start() workers.append(worker) parent_conns.append(parent_conn) g_episode = 0 g_step = 0 while g_episode < max_episode: while queue.empty(): # Wait for worker's state continue # Received some data idx, command, parameter = queue.get() if command == "Result": episode, step, reward, x_pos = parameter g_episode += 1 g_step += step print('[ Worker %2d ] ' % (idx), end='') print("Episode : %5d\tStep : %5d\tReward : %5d\t\tX_pos : %5d" % (g_episode, g_step, reward, x_pos)) writer.add_scalar('perf/x_pos', x_pos, g_step)
if __name__ == '__main__': np.random.seed(random_seed) torch.manual_seed(random_seed) result_queue = Queue() x = [] sample_num = 64 for t_step in range(500, 20001, 500): workers = [] for _ in range(8): worker = Worker(result_queue, t_step) worker.start() workers.append(worker) seen = 0 while seen < sample_num: if not result_queue.empty(): profit, max_drawdown = result_queue.get() x.append([t_step, profit, max_drawdown]) print(t_step, profit, max_drawdown) seen += 1 for worker in workers: worker.join() df = pd.DataFrame(np.array(x), columns=['total_step', 'profit', 'max_drawdown']) df.to_csv('draw-profit-vs-step-arg-max.csv')
class AsyncLogger(Logger): @staticmethod def log_fn(self, stop_event: Event): try: self._super_create_loggers() self.resposne_queue.put({ k: self.__dict__[k] for k in ["save_dir", "tb_logdir", "is_sweep"] }) while True: try: cmd = self.draw_queue.get(True, 0.1) except EmptyQueue: if stop_event.is_set(): break else: continue self._super_log(*cmd) self.resposne_queue.put(True) except: print("Logger process crashed.") raise finally: print("Logger: syncing") if self.use_wandb: wandb.join() stop_event.set() print("Logger process terminating...") def create_loggers(self): self._super_create_loggers = super().create_loggers self.stop_event = Event() self.proc = Process(target=self.log_fn, args=(self, self.stop_event)) self.proc.start() atexit.register(self.finish) def __init__(self, *args, **kwargs): self.queue = [] self.draw_queue = Queue() self.resposne_queue = Queue() self._super_log = super().log self.waiting = 0 super().__init__(*args, **kwargs) self.__dict__.update(self.resposne_queue.get(True)) def log(self, plotlist, step=None): if self.stop_event.is_set(): return if not isinstance(plotlist, list): plotlist = [plotlist] plotlist = [p for p in plotlist if p] if not plotlist: return plotlist = U.apply_to_tensors(plotlist, lambda x: x.detach().cpu()) self.queue.append((plotlist, step)) self.flush(wait=False) def enqueue(self, data, step: Optional[int]): self.draw_queue.put((data, step)) self.waiting += 1 def wait_logger(self, wait=False): cond = (lambda: not self.resposne_queue.empty()) if not wait else ( lambda: self.waiting > 0) already_printed = False while cond() and not self.stop_event.is_set(): will_wait = self.resposne_queue.empty() if will_wait and not already_printed: already_printed = True sys.stdout.write("Warning: waiting for logger... ") sys.stdout.flush() try: self.resposne_queue.get(True, 0.2) except EmptyQueue: continue self.waiting -= 1 if already_printed: print("done.") def flush(self, wait: bool = True): while self.queue: plotlist, step = self.queue[0] for i, p in enumerate(plotlist): if isinstance(p, PlotAsync): res = p.get(wait) if res is not None: plotlist[i] = res else: if wait: assert p.failed # Exception in the worker thread print( "Exception detected in a PlotAsync object. Syncing logger and ignoring further plots." ) self.wait_logger(True) self.stop_event.set() self.proc.join() return self.queue.pop(0) self.enqueue(plotlist, step) self.wait_logger(wait) def finish(self): if self.stop_event.is_set(): return self.flush(True) self.stop_event.set() self.proc.join()
def learn( self, total_timesteps: int, callback: MaybeCallback = None, log_interval: int = 4, eval_env: Optional[GymEnv] = None, eval_freq: int = -1, n_eval_episodes: int = 5, tb_log_name: str = "run", eval_log_path: Optional[str] = None, reset_num_timesteps: bool = True, ) -> "OffPolicyAlgorithm": total_timesteps, callback = self._setup_learn( total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, eval_log_path, reset_num_timesteps, tb_log_name) callback.on_training_start(locals(), globals()) # train vae print("Train VAE...") while self.num_timesteps < total_timesteps: rollout = self.collect_rollouts( self.env, train_freq=self.train_freq, action_noise=self.action_noise, callback=callback, learning_starts=self.learning_starts, replay_buffer=self.replay_buffer, log_interval=log_interval, ) if rollout.continue_training is False: break if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts: # If no `gradient_steps` is specified, # do as many gradients steps as steps performed during the rollout print("T VAE") gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps self.train_vae(batch_size=self.batch_size, gradient_steps=gradient_steps) """ gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps self.train_vae(batch_size=self.batch_size, gradient_steps=gradient_steps) """ # train mdnrnn print("Train MDNRNN...") self.replay_buffer = ReplayBufferAD( self.buffer_size, self.observation_space, self.action_space, self.device, optimize_memory_usage=self.optimize_memory_usage, ) total_timesteps = 30 while self.num_timesteps < total_timesteps: rollout = self.collect_rollouts( self.env, train_freq=self.train_freq, action_noise=self.action_noise, callback=callback, learning_starts=self.learning_starts, replay_buffer=self.replay_buffer, log_interval=log_interval, ) if rollout.continue_training is False: break if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts: # If no `gradient_steps` is specified, # do as many gradients steps as steps performed during the rollout print("T MDNRNN") gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps self.train_mdnrnn(batch_size=self.batch_size, gradient_steps=gradient_steps) """ gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps self.train_mdnrnn(batch_size=self.batch_size, gradient_steps=gradient_steps) """ # train controller print("Train Controller...") p_queue = Queue() r_queue = Queue() e_queue = Queue() num_workers = 16 for p_index in range(num_workers): Process(target=self.slave_routine, args=(p_queue, r_queue, e_queue, p_index)).start() cur_best = None parameters = self.controller.parameters() es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1, {'popsize': 4}) epoch = 0 log_step = 3 while not es.stop(): if cur_best is not None and -cur_best > 950: print("Already better than target, breaking...") break r_list = [0] * 4 # result list solutions = es.ask() # push parameters to queue i = 0 for s_id, s in enumerate(solutions): for _ in range(4): i += 1 p_queue.put((s_id, s)) # retrieve results for _ in range(16): while r_queue.empty(): sleep(.1) r_s_id, r = r_queue.get() r_list[r_s_id] += r / 4 es.tell(solutions, r_list) es.disp() # evaluation and saving if epoch % log_step == log_step - 1: best_params, best, std_best = self.evaluate( p_queue, r_queue, solutions, r_list) print("Current evaluation: {}".format(best)) if not cur_best or cur_best > best: cur_best = best print("Saving new best with value {}+-{}...".format( -cur_best, std_best)) load_parameters(best_params, self.controller) if -best > 950: print("Terminating controller training with value {}...". format(best)) break epoch += 1 es.result_pretty() e_queue.put('EOP') callback.on_training_end() return self
class StatProcess(Process): def __init__(self, *args): """ Statistics process saves the statistics obtained from workers. In particular, the shared models are saved every Config.MODEL_SAVE_FREQUENCY episodes. Moreover, some statistics are logged every Config.LOG_STATS_FREQUENCY episodes. """ super(StatProcess, self).__init__() self.episode_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE) self.ae_loss_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE) self.episode_count = Value('i', 0) self.model_save = Value('i', 0) self.exit_flag = Value('i', 0) #:obj:`dict`: Dictionary of DPS models for RL. self.agents = {} for model, env_id in zip(args, Config.ENV_IDS): self.agents[env_id] = model #float: Time at start for logging. self._start_time = time.time() def run(self): """ Runs the statistics process. (i) Get statistics from shared memory queue. If process cannot find data for some time, it may time out. (ii) Saves statistics to file. (iii) Increments episode count. (iv) Communicates to server that model may be saved after n episodes. (v) Logs current episode statistics after m episodes. """ print('Start gathering statistics.') sys.stdout.flush() with open(Config.RESULTS_FILE, 'a') as results_logger, \ open(Config.RESULTS_LOSS_FILE, 'a') as loss_logger, \ open(Config.SELECTION_FILE, 'a') as select_logger: while True: # (i) Get statistics. Ignore errors when exiting. try: if Config.TRAIN_MODE == 'policy': # Get episode log. episode_time, env_id, \ total_reward, length = self.episode_log_q.get(timeout=Config.WAIT_STATS_INTERRUPT) loss_q_empty = self.ae_loss_log_q.empty() if Config.TRAIN_MODE == 'selection' and not loss_q_empty: # Get loss log. training_time, loss_type, env_id_loss, \ loss, training_count = self.ae_loss_log_q.get(timeout=Config.WAIT_STATS_INTERRUPT) self.episode_count.value += 1 except (FileNotFoundError, ConnectionResetError) as error: if self.exit_flag.value: warnings.warn( f'Ignored error in statistics while trying to close: {error}' ) else: raise error # (ii) Saves statistics. if Config.TRAIN_MODE == 'policy': # Save episode log. results_logger.write( '%s, %s, %10.4f, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), env_id, total_reward, length)) results_logger.flush() if Config.TRAIN_MODE == 'selection' and not loss_q_empty: # Save loss log. loss_logger.write( '%s, %s, %s, %d, %10.8f\n' % (training_time.strftime("%Y-%m-%d %H:%M:%S"), loss_type, env_id_loss, training_count, loss)) loss_logger.flush() if (Config.TRAIN_MODE == 'selection' and self.episode_count.value % Config.SELECTION_SAVE_FREQUENCY == 0 and self.episode_count.value != 0 and not loss_q_empty): # Save selection log. for env_id in Config.ENV_IDS: selection = self.agents[ env_id].selection.selectors.data.tolist() select_logger.write('%s, %s\n' % (env_id, str(selection))) select_logger.flush() # (iii) Increments episode count. if Config.TRAIN_MODE == 'policy': self.episode_count.value += 1 # (iv) Tells server to save model. if Config.SAVE_MODELS and self.episode_count.value % Config.MODEL_SAVE_FREQUENCY == 0: self.model_save.value = 1 # (v) Logs some statistiscs. if Config.TRAIN_MODE == 'policy' and self.episode_count.value % Config.LOG_STATS_FREQUENCY == 0: print( '[ Time: %8d ] ' '[ Environment type: %5s ] ' '[ Episode #%8d with total Score %10.4f and length %8d. ]' % (int(time.time() - self._start_time), env_id, self.episode_count.value, total_reward, length)) if Config.TRAIN_MODE == 'selection' and not loss_q_empty: print('[ Training #%12d ] ' '[ Episode #%8d ] ' '[ Loss for type: %6s ] ' '[ Trainer for type: %5s ] ' '[ Loss: %10.8f. ]' % (training_count, self.episode_count.value, loss_type, env_id_loss, loss)) sys.stdout.flush() print('Statistics have been closed.') sys.stdout.flush()
def _call_mods_from_fast5s_cpu2(motif_seqs, chrom2len, fast5s_q, len_fast5s, positions, model_path, success_file, args): # features_batch_q = mp.Queue() # errornum_q = mp.Queue() features_batch_q = Queue() errornum_q = Queue() # pred_str_q = mp.Queue() pred_str_q = Queue() nproc = args.nproc nproc_call_mods = nproc_to_call_mods_in_cpu_mode if nproc <= nproc_call_mods + 1: nproc = nproc_call_mods + 1 + 1 fast5s_q.put("kill") features_batch_procs = [] for _ in range(nproc - nproc_call_mods - 1): p = mp.Process(target=_read_features_fast5s_q, args=(fast5s_q, features_batch_q, errornum_q, motif_seqs, chrom2len, positions, args)) p.daemon = True p.start() features_batch_procs.append(p) call_mods_gpu_procs = [] for _ in range(nproc_call_mods): p_call_mods_gpu = mp.Process(target=_call_mods_q, args=(model_path, features_batch_q, pred_str_q, success_file, args)) p_call_mods_gpu.daemon = True p_call_mods_gpu.start() call_mods_gpu_procs.append(p_call_mods_gpu) # print("write_process started..") p_w = mp.Process(target=_write_predstr_to_file, args=(args.result_file, pred_str_q)) p_w.daemon = True p_w.start() errornum_sum = 0 while True: running = any(p.is_alive() for p in features_batch_procs) while not errornum_q.empty(): errornum_sum += errornum_q.get() if not running: break for p in features_batch_procs: p.join() features_batch_q.put("kill") for p_call_mods_gpu in call_mods_gpu_procs: p_call_mods_gpu.join() # print("finishing the write_process..") pred_str_q.put("kill") p_w.join() print("%d of %d fast5 files failed.." % (errornum_sum, len_fast5s))
def train(): np.random.seed(random_seed) torch.manual_seed(random_seed) writer = SummaryWriter() s2 = S2(latent_num, cnn_chanel_num, stat_dim).to(device).share_memory() writer.add_graph( s2, (torch.zeros([1, 1, img_shape[0], img_shape[1]]).to(device), torch.zeros([1, stat_dim]).to(device))) optim = GlobalAdam([{ 'params': s2.encode_img.parameters() }, { 'params': s2.encode_stat.parameters() }, { 'params': s2.pi.parameters() }, { 'params': s2.actor.parameters() }], lr=1e-2, weight_decay=0.01) if os.path.exists('S2_state_dict.pt'): s2.load_state_dict(torch.load('S2_state_dict.pt')) optim.load_state_dict(torch.load('S2_Optim_state_dict.pt')) pair_queue = Queue(10000) validate_queue = Queue() optimizer_lock = Lock() process = [] data_list = [ 'A8888.XDCE', 'AL8888.XSGE', 'AU8888.XSGE', 'C8888.XDCE', 'M8888.XDCE', 'RU8888.XSGE', 'SR8888.XZCE' ] for no in range(mp.cpu_count() - 1): data = pd.read_csv(f"../data/{data_list[no]}_5m.csv") worker = Worker_Generator(no, data, pair_queue) worker.start() process.append(worker) validater = Validate(s2, optimizer_lock, validate_queue) validater.start() epochs = 0 while True: imgs = [] stats = [] cates = [] seen = 0 while seen < minibatch: img, stat, cate = pair_queue.get() imgs.append(img) stats.append(stat) cates.append(cate) seen += 1 imgs = torch.tensor(imgs).float().to(device) stats = torch.tensor(stats).float().to(device) g_t = torch.tensor(cates).long().to(device) pred = s2(imgs, stats) loss = F.cross_entropy(pred, g_t) accr = (pred.argmax(1) == g_t).sum().item() / minibatch with optimizer_lock: optim.zero_grad() loss.backward() optim.step() if not validate_queue.empty(): val_reward, val_money, val_win = validate_queue.get() writer.add_scalar('Validate/reward', val_reward, epochs) writer.add_scalar('Validate/money', val_money, epochs) writer.add_scalar('Validate/win_rate', val_win, epochs) writer.add_scalar('Train/Loss', loss.item(), epochs) writer.add_scalar('Train/Accr', accr, epochs) epochs += 1 if epochs % save_every == 0: torch.save(s2.state_dict(), 'S2_state_dict.pt') torch.save(optim.state_dict(), 'S2_Optim_state_dict.pt') for worker in process: worker.join()
break r_list = [0] * pop_size # result list solutions = es.ask() # push parameters to queue for s_id, s in enumerate(solutions): for _ in range(n_samples): p_queue.put((s_id, s)) # retrieve results if args.display: pbar = tqdm(total=pop_size * n_samples) for _ in range(pop_size * n_samples): while r_queue.empty(): sleep(.1) r_s_id, r = r_queue.get() r_list[r_s_id] += r / n_samples if args.display: pbar.update(1) if args.display: pbar.close() es.tell(solutions, r_list) es.disp() # evaluation and saving if epoch % log_step == log_step - 1: best_params, best, std_best = evaluate(solutions, r_list)
def controller_train_proc(ctrl_dir, controller, vae, mdrnn, target_return=950, skip_train=False, display=True): step_log('4-2. controller_train_proc START!!') # define current best and load parameters cur_best = None if not os.path.exists(ctrl_dir): os.mkdir(ctrl_dir) ctrl_file = os.path.join(ctrl_dir, 'best.tar') p_queue = Queue() r_queue = Queue() #e_queue = Queue() # pipaek : not necessary if not multiprocessing print("Attempting to load previous best...") if os.path.exists(ctrl_file): #state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'}) state = torch.load(ctrl_file) cur_best = -state['reward'] controller.load_state_dict(state['state_dict']) print("Previous best was {}...".format(-cur_best)) if skip_train: return # pipaek : 트레이닝을 통한 모델 개선을 skip하고 싶을 때.. def evaluate(solutions, results, rollouts=100): # pipaek : rollout 100 -> 10 , originally 100 """ Give current controller evaluation. Evaluation is minus the cumulated reward averaged over rollout runs. :args solutions: CMA set of solutions :args results: corresponding results :args rollouts: number of rollouts :returns: minus averaged cumulated reward """ index_min = np.argmin(results) best_guess = solutions[index_min] restimates = [] for s_id in range(rollouts): print('p_queue.put(), s_id=%d' % s_id) p_queue.put((s_id, best_guess)) print('>>>rollout_routine!!') rollout_routine() # pipaek : 여기서도 p_queue.put 하자마자 바로 처리.. print(">>>Evaluating...") for _ in tqdm(range(rollouts)): #while r_queue.empty(): # sleep(.1) # pipaek : multi-process가 아니므로 if not r_queue.empty( ): # pipaek : 20180718 r_queue.get()에서 stuck되어 있는 것을 방지하기 위해 체크!! #print('r_queue.get()') #restimates.append(r_queue.get()[1]) r_s_id, r = r_queue.get() print( 'in evaluate r_queue.get() r_s_id=%d, r_queue remain=%d' % (r_s_id, r_queue.qsize())) restimates.append(r) else: print('r_queue.empty() -> break!!') break return best_guess, np.mean(restimates), np.std(restimates) def rollout_routine(): """ Thread routine. Threads interact with p_queue, the parameters queue, r_queue, the result queue and e_queue the end queue. They pull parameters from p_queue, execute the corresponding rollout, then place the result in r_queue. Each parameter has its own unique id. Parameters are pulled as tuples (s_id, params) and results are pushed as (s_id, result). The same parameter can appear multiple times in p_queue, displaying the same id each time. As soon as e_queue is non empty, the thread terminate. When multiple gpus are involved, the assigned gpu is determined by the process index p_index (gpu = p_index % n_gpus). :args p_queue: queue containing couples (s_id, parameters) to evaluate :args r_queue: where to place results (s_id, results) :args e_queue: as soon as not empty, terminate :args p_index: the process index """ # init routine #gpu = p_index % torch.cuda.device_count() #device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu') # redirect streams #if not os.path.exists(tmp_dir): # os.mkdir(tmp_dir) #sys.stdout = open(os.path.join(tmp_dir, 'rollout.out'), 'a') #sys.stderr = open(os.path.join(tmp_dir, 'rollout.err'), 'a') with torch.no_grad(): r_gen = RolloutGenerator(vae, mdrnn, controller, device, rollout_time_limit) while not p_queue.empty(): print('in rollout_routine, p_queue.get()') s_id, params = p_queue.get() print('r_queue.put() sid=%d' % s_id) r_queue.put((s_id, r_gen.rollout(params))) print('r_gen.rollout OK, r_queue.put()') #r_queue.qsize() parameters = controller.parameters() es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1, {'popsize': C_POP_SIZE}) print("CMAEvolutionStrategy start OK!!") epoch = 0 log_step = 3 while not es.stop(): print("--------------------------------------") print("CURRENT EPOCH = %d" % epoch) if cur_best is not None and -cur_best > target_return: print("Already better than target, breaking...") break r_list = [0] * C_POP_SIZE # result list solutions = es.ask() print("CMAEvolutionStrategy-ask") # push parameters to queue for s_id, s in enumerate( solutions): # pipaek : 이 for가 C_POP_SIZE 만큼 반복된다. #for _ in range(C_POP_SIZE * C_N_SAMPLES): for _ in range(C_N_SAMPLES): print('in controller_train_proc p_queue.put() s_id : %d' % s_id) p_queue.put((s_id, s)) #print("p_queue.put %d" % s_id) rollout_routine( ) # pipaek : p_queue.put 하자마자 바로 get해서 rollout하고 나서 r_queue에 결과 입력. print("rollout_routine OK, r_queue size=%d" % r_queue.qsize()) # retrieve results if display: pbar = tqdm(total=C_POP_SIZE * C_N_SAMPLES) #for idx in range(C_POP_SIZE * C_N_SAMPLES): while not r_queue.empty( ): # pipaek : 20180718 여기서 r_queue.get을 못해서 영원히 걸려있는 상태를 방지하기 위해 for문을 while문으로 바꾼다. #while r_queue.empty(): # sleep(.1) try: r_s_id, r = r_queue.get() print( 'in controller_train_proc r_queue.get() r_s_id=%d, r_queue remain=%d' % (r_s_id, r_queue.qsize())) r_list[r_s_id] += r / C_N_SAMPLES if display: pbar.update(1) except IndexError as err: print('IndexError during r_queue.get()') print('cur r_list size:%d, index:%d' % (len(r_list), r_s_id)) if display: pbar.close() es.tell(solutions, r_list) # pipaek : solution array에다가 r_list 결과를 업데이트.. es.disp() # evaluation and saving if epoch % log_step == log_step - 1: print(">>>> TRYING EVALUATION, CURRENT EPOCH = %d" % epoch) best_params, best, std_best = evaluate( solutions, r_list, rollouts=100 ) # pipaek : evaluate을 위해서 rollout은 10번만 하자.. originally 100 print("Current evaluation: {}".format(best)) if not cur_best or cur_best > best: cur_best = best print("Saving new best with value {}+-{}...".format( -cur_best, std_best)) load_parameters(best_params, controller) torch.save( { 'epoch': epoch, 'reward': -cur_best, 'state_dict': controller.state_dict() }, os.path.join(ctrl_dir, 'best.tar')) if -best > target_return: print( "Terminating controller training with value {}...".format( best)) break epoch += 1 print("es.stop!!") es.result_pretty()