def create(cls): if not hasattr(cls, 'length_to_eps'): # Maps episode length to list of episodes cls.length_to_eps = {} if not hasattr(cls, 'ep_indices'): # Set of episode indices already in the cache cls.ep_indices = set() if not hasattr(cls, 'batches'): # List of batches if popping batches cls.batches = [] if not hasattr(cls, 'load_complete'): # If all episodes have been loaded into memory cls.load_complete = Value(ctypes.c_bool, False) if not hasattr(cls, 'batches_lock'): # Lock to access batches cls.batches_lock = Lock() if not hasattr(cls, 'cache_lock'): # Lock to access length_to_eps cls.cache_lock = Lock() if not hasattr(cls, 'fill_cache_lock'): # Lock for condition variables cls.fill_cache_lock = RLock() if not hasattr(cls, 'add_to_cache_cv'): # Condition notifying Loader to add to cache cls.add_to_cache_cv = Condition(lock=cls.fill_cache_lock) if not hasattr(cls, 'cache_filled_cv'): # Condition notifying teacher that cache has episodes cls.cache_filled_cv = Condition(lock=cls.fill_cache_lock)
def __init__(self, keys: List[Union[int, str]], zero_obs: dict, device: torch.device, out_queue: Queue, recorder: Recorder, trajectory_length: int = 128): # ATTRIBUTES self.out_queue = out_queue self.device = device self.recorder = recorder self._trajectory_length = trajectory_length self.zero_obs = {k: v.to(self.device) for k, v in zero_obs.items()} self.zero_obs['episode_id'] = torch.ones( (1, 1), device=self.device) * -1 self.zero_obs['prev_episode_id'] = torch.ones( (1, 1), device=self.device) * -1 self._reset_states(self.zero_obs) # Counters self._trajectory_counter = 0 self._episode_counter = len(keys) # Setup storage self._internal_store = {k: self._new_trajectory() for k in keys} self._episode_id_store = {k: i for i, k in enumerate(keys)} self._locks_trajectories = {k: Lock() for k in keys} self._lock_episode_counter = Lock()
def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") threads_gpu = config["gpu threads"] if "gpu threads" in config else 2 super(Agent_async, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # environment sub-process list self._environment_proc = [] # policy sub-process list self._policy_proc = [] # used for synchronize policy parameters self._param_pipe = None self._policy_lock = Lock() # used for synchronize roll-out commands self._control_pipe = None self._environment_lock = Lock() step_pipe = [] cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._control_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_envs in range(threads): child_name = f"environment_{i_envs}" step_pipe_pi, step_pipe_env = Pipe(duplex=True) step_lock = Lock() worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_envs, "gpu": gpu }) child = Process(target=Agent_async._environment_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, step_pipe_env, self._environment_lock, step_lock, self._sync_signal, deepcopy(environment), deepcopy(filter_op))) self._environment_proc.append(child) step_pipe.append((step_pipe_pi, step_lock)) child.start() for i_policies in range(threads_gpu): child_name = f"policy_{i_policies}" worker_cfg = ParamDict({ "seed": self.seed + 2048 + i_policies, "gpu": gpu }) child = Process(target=Agent_async._policy_worker, name=child_name, args=(worker_cfg, param_pipe_child, step_pipe, self._policy_lock, self._sync_signal, deepcopy(policy))) self._policy_proc.append(child) child.start() sleep(5)
def __init__(self, config, share_batches=True, manager=None, new_process=True): if new_process == True and manager is None: manager = Manager() self.knows = Semaphore(0) # > 0 if we know if any are coming # == 0 if DatasetReader is processing a command self.working = Semaphore(1 if new_process else 100) self.finished_reading = Lock( ) # locked if we're still reading from file # number of molecules that have been sent to the pipe: self.in_pipe = Value('i', 0) # Tracking what's already been sent through the pipe: self._example_number = Value('i', 0) # The final kill switch: self._close = Value('i', 0) self.command_queue = manager.Queue(10) self.molecule_pipeline = None self.batch_queue = Queue(config.data.batch_queue_cap ) #manager.Queue(config.data.batch_queue_cap) self.share_batches = share_batches self.dataset_reader = DatasetReader("dataset_reader", self, config, new_process=new_process) if new_process: self.dataset_reader.start()
def __init__(self, data_path, sizes_filename, seq_length, initial_seed, max_epochs=100): # Input parameters. self.data_path = data_path self.sizes_filename = sizes_filename self.seq_length = seq_length self.initial_seed = initial_seed self.max_epochs = max_epochs # Lock for building the dataset. self.lock = Lock() # Shard stuff. # Dictionary from shard nameto its size (number of element). self.master_shard_size_dict = None # Dictionary from shard name to modified size so it is # divisible by self.seq_length. self.shard_size_dict = None # Long array (self.max_epochs * num-shards) populated # randomly with shard names. self.shards_name = None # Start index of the data for a shard. self.shards_start_index = None self.build_shard_mappings_() self.data_length = self.shards_start_index[-1] # Data. self.shards_data = [None] * self.shards_name.size self.shards_sample_index = [None] * self.shards_name.size
def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") super(Agent_sync, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # sampler sub-process list self._sampler_proc = [] # used for synchronize commands self._cmd_pipe = None self._param_pipe = None self._cmd_lock = Lock() cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._cmd_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_thread in range(threads): child_name = f"sampler_{i_thread}" worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_thread, "gpu": gpu }) child = Process(target=Agent_sync._sampler_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, param_pipe_child, self._cmd_lock, self._sync_signal, deepcopy(policy), deepcopy(environment), deepcopy(filter_op))) self._sampler_proc.append(child) child.start()
def per_step(valLoader, model, criterion, downsamplingFactor): model.eval() criterion.eval() avgPER = 0 varPER = 0 nItems = 0 print("Starting the PER computation through beam search") bar = progressbar.ProgressBar(maxval=len(valLoader)) bar.start() for index, data in enumerate(valLoader): bar.update(index) with torch.no_grad(): seq, sizeSeq, phone, sizePhone = prepare_data(data) c_feature = model(seq) sizeSeq = sizeSeq / downsamplingFactor predictions = torch.nn.functional.softmax(criterion.getPrediction(c_feature), dim=2).cpu() c_feature = c_feature phone = phone.cpu() sizeSeq = sizeSeq.cpu() sizePhone = sizePhone.cpu() mutex = Lock() manager = Manager() poolData = manager.list() processes = [] for b in range(sizeSeq.size(0)): l_ = min(sizeSeq[b] // 4, predictions.size(1)) s_ = sizePhone[b] p = torch.multiprocessing.Process(target=get_local_per, args=(poolData, mutex, predictions[b, :l_].view(l_, -1).numpy(), phone[b, :s_].view(-1).numpy().astype(np.int32), criterion.BLANK_LABEL)) p.start() processes.append(p) for p in processes: p.join() avgPER += sum([x for x in poolData]) varPER += sum([x*x for x in poolData]) nItems += len(poolData) bar.finish() avgPER /= nItems varPER /= nItems varPER -= avgPER**2 print(f"Average PER {avgPER}") print(f"Standard deviation PER {math.sqrt(varPER)}")
def init_data(self): self.is_working = False self.semaphore = True self.is_change_bar = Value( c_bool, False) #whether user has dragged the slider,default: False self.frame_index = Value('i', 0) self.share_lock = Lock() #shared lock for frame_index self.share_lock2 = Lock() # shared lock for frame_index self.mutex = threading.Lock() self.timer = QTimer(self) # used for the updating of progress bar self.temp_timer = QTimer( self) #used for detecting whether the frame_total is given. self.frame_total = Value('i', -1) self.playable = Value(c_bool, True) self.is_working = Value(c_bool, False) manager = Manager() self.play_src = manager.Value(c_char_p, '0') #用于记录播放的视频地址 self.mode = None # 'online' or 'offline'
def __init__(self, chk_dir, chk, keep_epoch_chk=True, overwrite=True, mode=CFMode.AUTO, chk_prefix='model_v_'): self.logger = logging.getLogger(__name__) self.chk_dir = chk_dir self.chk = chk self.keep_epoch_chk = keep_epoch_chk self.overwrite = overwrite self.chk_prefix = chk_prefix self.mode = mode self.chk_epoch_subdir = 'epoch' self.mp_manager = Manager() self.snapshot_copy = None self.cpu_side = False # Active snapshot, if true, don't snapshot again self.active_snapshot = Value('i', 0) self.lock = Lock() self.in_progress_snapshot = Value('i', 0) # Handle to the process performing checkpoint # Can be only one at any instant. A new checkpoint # cannot start unless the previous one completes self.chk_process = None # `overwrite` supersedes if False if self.overwrite is False and self.keep_epoch_chk is False: self.keep_epoch_chk = True # Global ID of checkpoints being written # Used to format the checkpoint path # Instantiate from chk when restoring self.chk_global_id = -1 # Sorted List of available checkpoints (fnames) self.available_chk_iters = self.mp_manager.list() self.available_chk_epochs = self.mp_manager.list() self.initalize_chk_dir() self.logger.info("Available checkpoints : ") for item in self.available_chk_iters: self.logger.info(item)
def __init__(self, log_directory: str, hyperparams: HyperParams): self.lock = Lock() self.episode_lengths = [] self.episode_scores = [] self.episode_values = [] self.frame_counter = None self.queue = Queue(100) self.log_directory = log_directory if not os.path.exists(log_directory + '/hyperparams.txt'): with open(log_directory + '/hyperparams.txt', 'w') as param_file: for parameter in hyperparams._fields: value = getattr(hyperparams, parameter) print(f'{parameter}: {value}', file=param_file)
def __init__(self, path, data_type='data', mem_map=False, map_fn=None): lazypath = get_lazy_path(path) datapath = os.path.join(lazypath, data_type) #get file where array entries are concatenated into one big string self._file = open(datapath, 'rb') self.file = self._file #memory map file if necessary self.mem_map = mem_map if self.mem_map: self.file = mmap.mmap(self.file.fileno(), 0, prot=mmap.PROT_READ) lenpath = os.path.join(lazypath, data_type+'.len.pkl') self.lens = pkl.load(open(lenpath, 'rb')) self.ends = list(accumulate(self.lens)) self.dumb_ends = list(self.ends) self.read_lock = Lock() self.process_fn = map_fn self.map_fn = map_fn self._tokenizer = None
def __init__(self, path, data_type='data', mem_map=False, map_fn=None, is_array=False, array_data_type=np.int32): lazypath = get_lazy_path(path) datapath = os.path.join(lazypath, data_type) #get file where array entries are concatenated into one big string self._file = open(datapath, 'rb') self.file = self._file self.is_array = is_array self.array_data_type = array_data_type #memory map file if necessary lenpath = os.path.join(lazypath, data_type + '.len.pkl') self.lens = pkl.load(open(lenpath, 'rb')) self.ends = list(accumulate(self.lens)) self.dumb_ends = list(self.ends) self.mem_map = mem_map if self.mem_map: if is_array: if self.ends[-1] == 0: self.file = np.array([], dtype=array_data_type) else: self.file = np.memmap(self.file, dtype=array_data_type, mode='r', order='C') else: if self.ends[-1] == 0: self.file = bytearray() else: self.file = mmap.mmap(self.file.fileno(), 0, prot=mmap.PROT_READ) self.read_lock = Lock() self.process_fn = map_fn self.map_fn = map_fn self._tokenizer = None self.is_lazy = True
def run_sac(env, obs_state, num_actions, hyperps, device=torch.device("cpu"), render=True): mp.set_start_method('spawn') print(torch.multiprocessing.get_start_method()) #import pudb; pudb.set_trace() shared_model = ActorSimpleMem(412, 2).share_memory().to(device) shared_optimizer = SharedAdam(shared_model.parameters(), lr=hyperps['q_lr']) lock = Lock() shared_buffer = BasicBuffer(hyperps['maxmem']) processes = [] summary_model = ActorSimpleMem(412, 2) summary_model.load_state_dict(shared_model.state_dict()) writer.add_graph(summary_model, [(torch.randn(64, 412), torch.randn(64, 256))]) print('Spwaning training processes') for rank in range(3): p = mp.Process(target=train_on_env, args=(env, rank, lock, shared_model, shared_optimizer, obs_state, num_actions, hyperps, shared_buffer, device, render)) time.sleep(10) p.start() processes.append(p) for p in processes: p.join() #train_on_env(env, rank, lock, shared_model, shared_optimizer, obs_state, num_actions, hyperps, shared_buffer, device, render) writer.close()
def main(): args = parse_args() categories = parse_categories(parse_data(args.data)['names']) cap = cv2.VideoCapture(0) frame_queue = Queue() preds_queue = Queue() cur_dets = None frame_lock = Lock() proc = Process(target=detect, args=(frame_queue, preds_queue, frame_lock, args)) proc.start() try: while (True): ret, frame = cap.read() frame_lock.acquire() while not frame_queue.empty(): frame_queue.get() frame_queue.put(frame) frame_lock.release() if not preds_queue.empty(): cur_dets = preds_queue.get() if cur_dets is not None and len(cur_dets) > 0: frame = draw_detections_opencv(frame, cur_dets[0], categories) cv2.imshow('frame', frame) cv2.waitKey(1) except KeyboardInterrupt: print('Interrupted') proc.join() cap.release() cv2.destroyAllWindows()
def __init__(self, bot_id, cfg, task_factory, encoder, Actor, Critic, goal_encoder): self.cfg = cfg self.bot = Bot( cfg, bot_id, encoder, goal_encoder, Actor, Critic, task_factory.state_size, task_factory.action_size, task_factory.wrap_action, task_factory.wrap_value) self.bot.share_memory() # !! must be done from main process !! self.iter = 0 self.freezed = 0 self.counter = 1 self.tau = LinearSchedule(cfg['tau_replay_counter'], cfg['tau_base'], cfg['tau_final']) self.lock = Lock() self.bot = BotProxy(self.lock, cfg, self.bot, cfg['device'])
def __init__(self, cache_dir, dataset_dir, dataset_list, cuda, batch_size=500, num_workers=3, renew_frequency=5, rejection_radius_position=0, numpatches=900, numneg=3, pos_thr=50.0, reject=True, mode='train', rejection_radius=3000, dist_type='3D', patch_radius=None, use_depth=False, use_normals=False, use_silhouettes=False, color_jitter=False, greyscale=False, maxres=4096, scale_jitter=False, photo_jitter=False, uniform_negatives=False, needles=0, render_only=False, maxitems=200, cache_once=False): super(MultimodalPatchesCache, self).__init__() self.cache_dir = cache_dir self.dataset_dir = dataset_dir #self.images_path = images_path self.dataset_list = dataset_list self.cuda = cuda self.batch_size = batch_size self.num_workers = num_workers self.renew_frequency = renew_frequency self.rejection_radius_position = rejection_radius_position self.numpatches = numpatches self.numneg = numneg self.pos_thr = pos_thr self.reject = reject self.mode = mode self.rejection_radius = rejection_radius self.dist_type = dist_type self.patch_radius = patch_radius self.use_depth = use_depth self.use_normals = use_normals self.use_silhouettes = use_silhouettes self.color_jitter = color_jitter self.greyscale = greyscale self.maxres = maxres self.scale_jitter = scale_jitter self.photo_jitter = photo_jitter self.uniform_negatives = uniform_negatives self.needles = needles self.render_only = render_only self.cache_done_lock = Lock() self.all_done = Value('B', 0) # 0 is False self.cache_done = Value('B', 0) # 0 is False self.wait_for_cache_builder = Event() # prepare for wait until initial cache is built self.wait_for_cache_builder.clear() self.cache_builder_resume = Event() self.maxitems = maxitems self.cache_once = cache_once if self.mode == 'eval': self.maxitems = -1 self.cache_builder = Process(target=self.buildCache, args=[self.maxitems]) self.current_cache_build = Value('B', 0) # 0th cache self.current_cache_use = Value('B', 1) # 1th cache self.cache_names = ["cache1", "cache2"] # constant rebuild_cache = True if self.mode == 'eval': validation_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(validation_dir): # we don't need to rebuild validation cache # TODO: check if cache is VALID rebuild_cache = False elif cache_once: build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): # we don't need to rebuild training cache if we are training # on limited subset of the training set rebuild_cache = False if rebuild_cache: # clear the caches if they already exist build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): shutil.rmtree(build_dataset_dir) use_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_use.value]) if os.path.isdir(use_dataset_dir): shutil.rmtree(use_dataset_dir) os.makedirs(build_dataset_dir) self.cache_builder_resume.set() self.cache_builder.start() # wait until initial cache is built # print("before wait to build") # print("wait for cache builder state", # self.wait_for_cache_builder.is_set()) self.wait_for_cache_builder.wait() # print("after wait to build") # we have been resumed if self.mode != 'eval' and (not self.cache_once): # for training, we can set up the cache builder to build # the second cache self.restart() else: # else for validation we don't need second cache # we just need to switch the built cache to the use cache in order # to use it tmp = self.current_cache_build.value self.current_cache_build.value = self.current_cache_use.value self.current_cache_use.value = tmp
else: playouts = [] # Load the replay memory replay_memory = ReplayMemory(16384) for playout in playouts: for move_datapoint in playout: replay_memory.push(move_datapoint) # Setup the network board_size = len(playouts[0][0].board_list) net = Resnet(2, board_size).to(DEVICE) if args.init_model is not None: net.Load(args.init_model) mem_lock = Lock() param_queue = None server = None shutdown_val = None receiver_proc = None if args.server_config is not None: # Setup the handling of workers and the parameter server with open(args.server_config) as server_file: config = json.load(server_file) if ('addr' not in config) or ('port' not in config): print("IP address (addr) and port number required in config") address = config['addr'] port = int(config['port'])
entropy_layout = dict(title="Entropies", xaxis={'title': 'n-step iter'}, yaxis={'title': 'entropy'}) MAX_EPISODES = 2000 DISCOUNT_FACTOR = 0.99 STEPS = 10 GlobalModel = Model() GlobalModel.share_memory() CriticOptimizer = torch.optim.Adam(GlobalModel.CriticParameters(), lr=0.01) ActorOptimizer = torch.optim.Adam(GlobalModel.ActorParameters(), lr=0.001) Optimizer = torch.optim.Adam(GlobalModel.parameters(), lr=0.001) lock = Lock() num_cpu = 4 agents = [] for cpu in range(num_cpu): agents.append(Agent(cpu)) receiver, sender = Pipe() agent_threads = [] for agent in agents: thread = Process(target=agent.letsgo, args=( GlobalModel, CriticOptimizer, ActorOptimizer,
''' Maps episode length to dictionary with following keys: current_idx: which episode in the list are we at (if simply indexing into list) ep_list: list of episodes of the length of the key bucket_complete: if there are no more episodes left to consider in the bucket ''' # Maps episode length to list of episodes length_to_eps = {} # List of batches if popping batches batches = [] # If all episodes have been loaded into memory load_complete = Value(ctypes.c_bool, False) # Lock to access batches batches_lock = Lock() # Lock to access length_to_eps cache_lock = Lock() # Lock for condition variables fill_cache_lock = RLock() # Condition notifying Loader to add to cache add_to_cache_cv = Condition(lock=fill_cache_lock) # Condition notifying teacher that cache has episodes cache_filled_cv = Condition(lock=fill_cache_lock) def batch_cache(function): max_cache_size = 10000 # Max unseen eps min_cache_size = 1000 # Min unseen eps def get_cache_size():
''' Maps episode length to dictionary with following keys: current_idx: which episode in the list are we at (if simply indexing into list) ep_list: list of episodes of the length of the key bucket_complete: if there are no more episodes left to consider in the bucket ''' length_to_eps = {} # Maps episode length to list # of episodes batches = [] # List of batches if popping # batches load_complete = Value(ctypes.c_bool, False) # If all episodes have been # loaded into memory batches_lock = Lock() # Lock to access batches cache_lock = Lock() # Lock to access length_to_eps fill_cache_lock = RLock() # Lock for condition variables add_to_cache_cv = Condition(lock=fill_cache_lock) # Condition notifying Loader # to add to cache cache_filled_cv = Condition(lock=fill_cache_lock) # Condition notifying teacher # that cache has episodes def batch_cache(function): max_cache_size = 10000 # Max unseen eps min_cache_size = 1000 # Min unseen eps def get_cache_size(): '''Returns number of available episodes ''' return sum(len(v['ep_list']) - v['current_idx']for k, v in length_to_eps.items())
def crop_face(args): for k, v in default_args.items(): setattr(args, k, v) assert osp.exists(args.data_dir), "The input dir not exist" root_folder_name = args.data_dir.split('/')[-1] src_folder = args.data_dir dst_folder = args.data_dir.replace(root_folder_name, root_folder_name + '_OPPOFaces') lz.mkdir_p(dst_folder, delete=False) ds = TestData(src_folder) loader = torch.utils.data.DataLoader(ds, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True, drop_last=False ) # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping queue = Queue() lock = Lock() consumers = [] for i in range(args.num_consumers): p = Process(target=consumer, args=(queue, lock)) p.daemon = True consumers.append(p) for c in consumers: c.start() # 3. forward ttl_nimgs = 0 ttl_imgs = [] data_meter = lz.AverageMeter() model_meter = lz.AverageMeter() post_meter = lz.AverageMeter() lz.timer.since_last_check('start crop face') for ind, data in enumerate(loader): data_meter.update(lz.timer.since_last_check(verbose=False)) if (data['finish'] == 1).all().item(): logging.info('finish') break if ind % 10 == 0: logging.info( f'proc batch {ind}, data time: {data_meter.avg:.2f}, model: {model_meter.avg:.2f}, post: {post_meter.avg:.2f}') mask = data['finish'] == 0 input = data['img'][mask] input_np = input.numpy() roi_box = data['roi_box'][mask].numpy() imgfn = np.asarray(data['imgfn'])[mask.numpy().astype(bool)] dst_imgfn = [img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') for img_fp in imgfn] ttl_imgs.extend(dst_imgfn) ttl_nimgs += mask.sum().item() with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().astype(np.float32) model_meter.update(lz.timer.since_last_check(verbose=False)) queue.put((imgfn, param, roi_box, dst_imgfn)) # pts68 = [predict_68pts(param[i], roi_box[i]) for i in range(param.shape[0])] # pts68_proc = [predict_68pts(param[i], [0, 0, STD_SIZE, STD_SIZE]) for i in range(param.shape[0])] # for img_fp, pts68_, pts68_proc_, img_, dst in zip(imgfn, pts68, pts68_proc, input_np, dst_imgfn): # ## this may need opt to async read write # img_ori = cvb.read_img(img_fp) # pts5 = to_landmark5(pts68_[:2, :].transpose()) # warped = preprocess(img_ori, landmark=pts5) # # plt_imshow(warped, inp_mode='bgr'); plt.show() # lz.mkdir_p(osp.dirname(dst), delete=False) # cvb.write_img(warped, dst) # # ## this may cause black margin # # pts5 = to_landmark5(pts68_proc_[:2, :].transpose()) # # warped = preprocess(to_img(img_), landmark=pts5) # # # plt_imshow(warped, inp_mode='bgr'); plt.show() # # dst = img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') # # cvb.write_img(warped, dst) # if args.dump_res: # img_ori = cvb.read_img(img_fp) # pts_res = [pts68_] # dst = img_fp.replace(root_folder_name, root_folder_name + '_kpts.demo') # lz.mkdir_p(osp.dirname(dst), delete=False) # draw_landmarks(img_ori, pts_res, # wfp=dst, # show_flg=args.show_flg) post_meter.update(lz.timer.since_last_check(verbose=False)) lz.msgpack_dump(ttl_imgs, dst_folder + '/' + 'all_imgs.pk') del model, input torch.cuda.empty_cache() while not queue.empty(): time.sleep(1)
def sim_games(N_games, N_MCTS, model, number_of_processes, v_resign, model2 = None, duel=False, batch_size = 8, board_size = 9): #### Function for generating games print("Starting sim games") process_workers = [] torch.multiprocessing.set_start_method('spawn', force=True) # Make queues for sending data gpu_Q = Queue() if (duel==False): data_Q = Queue() # Also make pipe for receiving v_resign conn_rec, conn_send = Pipe(False) p_data = Process(target=data_handler, args=(data_Q, N_games, conn_send)) process_workers.append(p_data) else: winner_Q = Queue() gpu_Q2 = Queue() process_workers.append(Process(target=gpu_worker, args=(gpu_Q2, batch_size, board_size, model2))) # Make counter and lock game_counter = Value('i', 0) lock = Lock() # Make process for gpu worker and data_loader process_workers.append(Process(target=gpu_worker, args=(gpu_Q, batch_size, board_size, model))) # Start gpu and data_loader worker print("GPU processes") for p in process_workers: p.start() # Construct tasks for workers procs = [] torch.multiprocessing.set_start_method('fork', force=True) print("defining worker processes") for i in range(number_of_processes): seed = np.random.randint(int(2**31)) if (duel==True): procs.append(Process(target=sim_duel_game_worker, args=(gpu_Q, gpu_Q2, N_MCTS, winner_Q, N_games, lock, game_counter, seed))) else: procs.append(Process(target=sim_game_worker, args=(gpu_Q, N_MCTS, data_Q, v_resign, N_games, lock, game_counter, seed))) print("Starting worker processes") # Begin running games for p in procs: p.start() # Join processes if (duel==False): # Receive new v_resign v_resign = conn_rec.recv() else: player1_wins = 0 player2_wins = 0 for i in range(N_games): player1_won = winner_Q.get(True) if (player1_won==1): player1_wins += 1 else: player2_wins += 1 for p in procs: p.join() # Close processes for p in process_workers: p.terminate() # Returns v_resign if training else winrate when dueling if (duel==False): return v_resign else: return player1_wins, player2_wins
def __init__(self, val=0): self.val = Value('i', val) self.lock = Lock()
def train(): np.random.seed(random_seed) torch.manual_seed(random_seed) writer = SummaryWriter() s2 = S2(latent_num, cnn_chanel_num, stat_dim).to(device).share_memory() writer.add_graph( s2, (torch.zeros([1, 1, img_shape[0], img_shape[1]]).to(device), torch.zeros([1, stat_dim]).to(device))) optim = GlobalAdam([{ 'params': s2.encode_img.parameters() }, { 'params': s2.encode_stat.parameters() }, { 'params': s2.pi.parameters() }, { 'params': s2.actor.parameters() }], lr=1e-2, weight_decay=0.01) if os.path.exists('S2_state_dict.pt'): s2.load_state_dict(torch.load('S2_state_dict.pt')) optim.load_state_dict(torch.load('S2_Optim_state_dict.pt')) pair_queue = Queue(10000) validate_queue = Queue() optimizer_lock = Lock() process = [] data_list = [ 'A8888.XDCE', 'AL8888.XSGE', 'AU8888.XSGE', 'C8888.XDCE', 'M8888.XDCE', 'RU8888.XSGE', 'SR8888.XZCE' ] for no in range(mp.cpu_count() - 1): data = pd.read_csv(f"../data/{data_list[no]}_5m.csv") worker = Worker_Generator(no, data, pair_queue) worker.start() process.append(worker) validater = Validate(s2, optimizer_lock, validate_queue) validater.start() epochs = 0 while True: imgs = [] stats = [] cates = [] seen = 0 while seen < minibatch: img, stat, cate = pair_queue.get() imgs.append(img) stats.append(stat) cates.append(cate) seen += 1 imgs = torch.tensor(imgs).float().to(device) stats = torch.tensor(stats).float().to(device) g_t = torch.tensor(cates).long().to(device) pred = s2(imgs, stats) loss = F.cross_entropy(pred, g_t) accr = (pred.argmax(1) == g_t).sum().item() / minibatch with optimizer_lock: optim.zero_grad() loss.backward() optim.step() if not validate_queue.empty(): val_reward, val_money, val_win = validate_queue.get() writer.add_scalar('Validate/reward', val_reward, epochs) writer.add_scalar('Validate/money', val_money, epochs) writer.add_scalar('Validate/win_rate', val_win, epochs) writer.add_scalar('Train/Loss', loss.item(), epochs) writer.add_scalar('Train/Accr', accr, epochs) epochs += 1 if epochs % save_every == 0: torch.save(s2.state_dict(), 'S2_state_dict.pt') torch.save(optim.state_dict(), 'S2_Optim_state_dict.pt') for worker in process: worker.join()
import fasttext import functools import numpy as np import pytext.utils.cuda_utils as cuda_utils from abc import abstractmethod from collections import OrderedDict from sentencepiece import SentencePieceProcessor from typing import List from pytorch_pretrained_bert import BertModel, BertTokenizer from pytext.config.field_config import ConfigBase from torch.multiprocessing import Lock MODEL_DOWNLOAD_LOCK = Lock() def run_model(model, inputs, layer): layers, _ = model(inputs) return layers[layer].cpu() class EmbedderInterface: __REGISTRY = dict() class Config(ConfigBase): max_pieces: int = -1 preproc_dir: str = "." use_cuda_if_available: bool = True embed_type: str = "BERTEmbed"
def get_best_model( log_path: str, mss: ModelSearchSet, train_data: Dataset, validn_data: Dataset, test_data: Dataset, get_dataset_kwargs: Dict[str, Any], exp_dir: str = '../out', devices_info: List[Tuple[int, int]] = [ (-1, 1) ], # (device_id, max_proc_cnt), -1 for cpu show_hpsearch_stats: bool = True): logger = Logger(log_path) if LOG_DATASET_STATS: logger.log(train_data.get_stats(title='Train Data')) logger.log(validn_data.get_stats(title='Validation Data')) logger.log(test_data.get_stats(title='Test Data')) # Prepare process-specific device ids, hyperparams and datasets proc_device_id: List[int] nprocs: int # Prep proc_device_id proc_device_id = [] device_ids_helper = {} for info in devices_info: device_ids_helper[info[0]] = info[1] rem = True while rem: rem = False for device_id in device_ids_helper: if device_ids_helper[device_id] > 0: rem = True proc_device_id.append(device_id) device_ids_helper[device_id] -= 1 # Prep proc_hps proc_hps = [] if mss.model_class not in CPU_MODELS: total_proc_cnt = len(proc_device_id) proc_hp_cnt = np.full(total_proc_cnt, len(mss.hps) // total_proc_cnt) proc_hp_cnt[:len(mss.hps) % total_proc_cnt] += 1 agg = 0 for cnt in proc_hp_cnt: if cnt > 0: proc_hps.append(mss.hps[agg:agg + cnt]) agg += cnt else: proc_hps = [mss.hps] nprocs = len(proc_hps) proc_device_id = proc_device_id[:nprocs] # Run the processes l: 'multiprocessing.synchronize.Lock' = Lock() cmlock: 'multiprocessing.synchronize.Lock' = Lock() started_hps = Value('i', 0) started_hps_lk: 'multiprocessing.synchronize.Lock' = Lock() finished_hps = Value('i', 0) total_hps = len(mss.hps) start_time = time.time() assert (train_data.name == validn_data.name) and (validn_data.name == test_data.name) and ( test_data.name == get_dataset_kwargs['dataset_name']) assert (train_data.shuffle_seed == validn_data.shuffle_seed) and ( validn_data.shuffle_seed == test_data.shuffle_seed) and (test_data.shuffle_seed == get_dataset_kwargs['shuffle_seed']) model_search_summary_path = '{}/{}/{}-search-summary{}{}.csv'.format( exp_dir, EXP_LOGS_DIR, get_dataset_kwargs['dataset_name'], DESC_SEP, exp_dir.split(DESC_SEP)[-1]) assert not os.path.exists(model_search_summary_path) logger.log(f'\nExperiment dir: {os.path.abspath(exp_dir)}\n') logger.log(f'Hyperparam configs: {total_hps}\n') logger.log(f'Number of processes: {nprocs}\n') args = (l, cmlock, started_hps, started_hps_lk, finished_hps, total_hps, start_time, mss.model_class, proc_hps, train_data, validn_data, test_data, get_dataset_kwargs, exp_dir, proc_device_id, log_path, model_search_summary_path, mss.use_lforb) if mss.model_class not in CPU_MODELS: spawn(get_best_model_aux, args=args, nprocs=nprocs) else: get_best_model_aux(0, *args) if show_hpsearch_stats: hpsearch_stats(model_search_summary_path, get_dataset_kwargs['dataset_name']) logger.log('\n==================================\n') logger.log('Model search summary saved to: {}\n'.format( os.path.abspath(model_search_summary_path))) logger.log('==================================\n\n') logger.close()
def train(): np.random.seed(random_seed) torch.manual_seed(random_seed) writer = SummaryWriter() ac = AC(latent_num, cnn_chanel_num, stat_dim) writer.add_graph(ac, (torch.zeros([1, 1, img_shape[0], img_shape[1] ]), torch.zeros([1, stat_dim]))) optim = GlobalAdam([{ 'params': ac.encode_img.parameters(), 'lr': 2.5e-5 }, { 'params': ac.encode_stat.parameters(), 'lr': 2.5e-5 }, { 'params': ac.pi.parameters(), 'lr': 2.5e-5 }, { 'params': ac.actor.parameters(), 'lr': 2.5e-5 }, { 'params': ac.f.parameters() }, { 'params': ac.V.parameters() }], lr=5e-3, weight_decay=weight_decay) if os.path.exists('S3_state_dict.pt'): ac.load_state_dict(torch.load('S3_state_dict.pt')) optim.load_state_dict(torch.load('S3_Optim_state_dict.pt')) else: ac.load_state_dict(torch.load('../stage2/S2_state_dict.pt'), strict=False) result_queue = Queue() validate_queue = Queue() gradient_queue = Queue() loss_queue = Queue() ep_cnt = Value('i', 0) optimizer_lock = Lock() processes = [] ac.share_memory() optimizer_worker = Process(target=update_shared_model, args=(gradient_queue, optimizer_lock, optim, ac)) optimizer_worker.start() for no in range(mp.cpu_count() - 3): worker = Worker(no, ac, ep_cnt, optimizer_lock, result_queue, gradient_queue, loss_queue) worker.start() processes.append(worker) validater = Validate(ac, ep_cnt, optimizer_lock, validate_queue) validater.start() best_reward = 0 while True: with ep_cnt.get_lock(): if not result_queue.empty(): ep_cnt.value += 1 reward, money, win_rate = result_queue.get() objective_actor, loss_critic, loss_f = loss_queue.get() writer.add_scalar('Interaction/Reward', reward, ep_cnt.value) writer.add_scalar('Interaction/Money', money, ep_cnt.value) writer.add_scalar('Interaction/win_rate', win_rate, ep_cnt.value) writer.add_scalar('Update/objective_actor', objective_actor, ep_cnt.value) writer.add_scalar('Update/loss_critic', loss_critic, ep_cnt.value) writer.add_scalar('Update/loss_f', loss_f, ep_cnt.value) with optimizer_lock: if reward > best_reward: best_reward = reward torch.save(ac.state_dict(), 'S3_BEST_state_dict.pt') if ep_cnt.value % save_every == 0: torch.save(ac.state_dict(), 'S3_state_dict.pt') torch.save(optim.state_dict(), 'S3_Optim_state_dict.pt') if not validate_queue.empty(): val_reward, val_money, val_win_rate = validate_queue.get() writer.add_scalar('Validation/reward', val_reward, ep_cnt.value) writer.add_scalar('Validation/money', val_money, ep_cnt.value) writer.add_scalar('Validation/win_rate', val_win_rate, ep_cnt.value) for worker in processes: worker.join() optimizer_worker.kill()
def __init__(self): self.value = Value('i', 0) self.lock = Lock() self.local = 0