def start_policy_worker(inputs): sw = stopwatch.StopWatch() args, experiment_name, i, lock, stats_queue, device, \ next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables, \ rollout_task_queues, policy_request_queue, learner_request_queue = inputs device = torch.device('cuda') agent = Agent(4).to(device) min_num_requests = 6 wait_for_min_requests = 0.025 # time.sleep(5) step = 0 while True: step += 1 with sw.timer('policy_worker'): waiting_started = time.time() policy_requests = [] with sw.timer('policy_requests'): while len(policy_requests) < min_num_requests and time.time( ) - waiting_started < wait_for_min_requests: try: policy_requests.extend( policy_request_queue.get_many(timeout=0.005)) except Empty: pass if len(policy_requests) == 0: continue with sw.timer('prepare_data'): ls = np.concatenate(policy_requests) rollout_worker_idxs = ls.T[0, ::args.num_envs // args.num_env_split] split_idxs = ls.T[1, ::args.num_envs // args.num_env_split] step_idxs = ls.T[-1, ::args.num_envs // args.num_env_split] idxs = tuple(ls.T) with sw.timer('index'): t1 = next_obs[idxs[:-1]] with sw.timer('create array'): t2 = torch.from_numpy(next_obs[idxs[:-1]]) with sw.timer('convert float'): t3 = t2.float() with sw.timer('move_to_gpu'): next_o = t3.to(device) # next_o = torch.from_numpy(next_obs[idxs[:-1]]).float().to(device) with sw.timer('inference'): with torch.no_grad(): a, l, e = agent.get_action(next_o) with sw.timer('move_to_cpu'): actions[idxs] = a.cpu() with sw.timer('execute_action'): for j in range(len(rollout_worker_idxs)): rollout_worker_idx = rollout_worker_idxs[j] split_idx = split_idxs[j] step_idx = step_idxs[j] rollout_task_queues[rollout_worker_idx].put( [split_idx, step_idx]) if step % 1000 == 0: print(ls.shape) print(stopwatch.format_report(sw.get_last_aggregated_report()))
def benchmark_find_local_nodes_impact(): sw = stopwatch.StopWatch() with sw.timer('connection_speed'): for i in range(100): with sw.timer('default'): Node().default() # for i in range(100): # with sw.timer('preselected'): # Node().default2() print(stopwatch.format_report(sw.get_last_aggregated_report()))
def start_rollout_worker(self, rollout_worker_idx, env_fns): sw = stopwatch.StopWatch() next_obs, next_done, obs, actions, logprobs, rewards, dones, values = self.storage env_idxs = range( rollout_worker_idx * self.num_envs_per_rollout_worker, rollout_worker_idx * self.num_envs_per_rollout_worker + self.num_envs_per_rollout_worker) envs = [None for _ in range(len(self.env_fns))] for env_idx in env_idxs: envs[env_idx] = self.env_fns[env_idx]() next_step = 0 self.policy_request_queue.put( [next_step, env_idx, rollout_worker_idx]) next_obs[env_idx] = torch.tensor(envs[env_idx].reset()) next_done[env_idx] = 0 local_step = 0 while True: with sw.timer('act'): with sw.timer('wait_rollout_task_queue'): tasks = self.rollout_task_queues[ rollout_worker_idx].get_many() with sw.timer('rollouts'): for task in tasks: step, env_idx = task obs[step, env_idx] = next_obs[env_idx].copy() dones[step, env_idx] = next_done[env_idx].copy() next_obs[env_idx], r, d, info = envs[env_idx].step( actions[step, env_idx]) if d: next_obs[env_idx] = envs[env_idx].reset() rewards[step, env_idx] = r next_done[env_idx] = d next_step = step + 1 local_step += 1 with sw.timer('logging'): self.policy_request_queue.put( [next_step, env_idx, rollout_worker_idx]) if 'episode' in info.keys(): # print(["charts/episode_reward", info['episode']['r']]) # self.stats_queue.put(['l', info['episode']['l']]) self.stats_queue.put([ "charts/episode_reward", info['episode']['r'] ]) if local_step % 1000 == 0: print(stopwatch.format_report(sw.get_last_aggregated_report())) print()
def start_policy_worker(inputs): # raise args, experiment_name, i, lock, stats_queue, device, \ next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables, \ rollout_task_queues, policy_request_queue, learner_request_queue = inputs data_loader = torch.utils.data.DataLoader(PolicyWorkerDataset(policy_request_queue, obs), batch_size=200) #, num_workers=2, pin_memory=True sw = stopwatch.StopWatch() device = torch.device('cuda') agent = Agent(4).to(device) min_num_requests = 3 wait_for_min_requests = 0.01 # time.sleep(5) step = 0 for batch_idx, (ls, next_o) in enumerate(data_loader): step += 1 with sw.timer('policy_worker'): with sw.timer('create array at gpu'): next_o = next_o.to(device, non_blocking=True) with sw.timer("prepare_data"): ls = ls.numpy() rollout_worker_idxs = ls.T[0,::args.num_envs//args.num_env_split] split_idxs = ls.T[1,::args.num_envs//args.num_env_split] step_idxs = ls.T[-1,::args.num_envs//args.num_env_split] idxs = tuple(ls.T) with sw.timer('inference'): with torch.no_grad(): a, l, e = agent.get_action(next_o) with sw.timer('move_to_cpu'): actions[idxs] = a.cpu() with sw.timer('execute_action'): for j in range(len(rollout_worker_idxs)): rollout_worker_idx = rollout_worker_idxs[j] split_idx = split_idxs[j] step_idx = step_idxs[j] rollout_task_queues[rollout_worker_idx].put([split_idx,step_idx]) # for idx, item in enumerate(idxs): # rollout_worker_idx = item[0] # split_idx = item[1] # step_idx = item[2] # with sw.timer('put_action'): # rollout_task_queues[rollout_worker_idx].put([split_idx,step_idx]) # actions[idxs] = a.cpu() # for j in range(len(rollout_worker_idxs)): # rollout_worker_idx = rollout_worker_idxs[j] # split_idx = split_idxs[j] # step_idx = step_idxs[j] if step % 100 == 0: # print(ls.shape) print(stopwatch.format_report(sw.get_last_aggregated_report()))
def benchmark_steem_passtrough(): sw = stopwatch.StopWatch() steem = Node().default() with sw.timer('connection_speed'): for i in range(1000): with sw.timer('default'): print(i) Account("furion") for i in range(1000): with sw.timer('passtrough'): Account("furion", steem=steem) # for i in range(100): # with sw.timer('preselected'): # Node().default2() print(stopwatch.format_report(sw.get_last_aggregated_report()))
def test_format_report(self): sw = StopWatch() add_timers(sw) agg_report = sw.get_last_aggregated_report() formatted_report = format_report(agg_report) assert formatted_report == \ "************************\n" \ "*** StopWatch Report ***\n" \ "************************\n" \ "root 900000.000 (100%)\n" \ " BUCKET_A child1 2 240000.000 (27%)\n" \ " grand_children1 1 20000.000 (2%)\n" \ " grand_children2 2 80000.000 (9%)\n" \ " grand_children3 1 10000.000 (1%)\n" \ " BUCKET_B child2 1 560000.000 (62%)\n" \ " grand_children1 1 260000.000 (29%)\n" \ " grand_children3 1 10000.000 (1%)\n" \ "Tags: Cooltag, Slowtag"
def test_format_report(self): sw = StopWatch() add_timers(sw) agg_report = sw.get_last_aggregated_report() formatted_report = format_report(agg_report) assert formatted_report == \ "root 900000.000ms (100%)\n" \ " BUCKET_A child1 2 240000.000ms (27%)\n" \ " grand_children1 1 20000.000ms (2%)\n" \ " grand_children2 2 80000.000ms (9%)\n" \ " grand_children3 1 10000.000ms (1%)\n" \ " BUCKET_B child2 1 560000.000ms (62%)\n" \ " grand_children1 1 260000.000ms (29%)\n" \ " grand_children3 1 10000.000ms (1%)\n" \ "Annotations: Cooltag, Slowtag" formatted_report2 = sw.format_last_report() assert formatted_report == formatted_report2
def run_pseudonymize_request(): data = {"success": False} stats_dict = SqliteDict('./api_stats.sqlite', autocommit=True) output_types = ["pseudonymized", "tagged", "conll"] try: if not request.form.get("output_type"): logging.info( "No tags were indicated. I will give you the text pseudonymized." ) output_type = "pseudonymized" else: output_type = request.form.get("output_type") if output_type not in output_types: logging.warning( "Your output type is not supported. I will give you the text pseudonymized." ) output_type = "pseudonymized" if request.form.get("text"): text = request.form.get("text") logging.info("Tagging text with model...") # Predict and return a CoNLL string to send to the web demo app output, analysis_ner_stats = prepare_output( text=text, tagger=TAGGER, output_type=output_type) data["text"] = output data["success"] = True # stats_dict[:] except Exception as e: logger.error(e) finally: logger.info(stopwatch.format_report(sw.get_last_aggregated_report())) if data["success"]: update_stats(analysis_stats=stats_dict, analysis_ner_stats=analysis_ner_stats, time_info=sw.get_last_aggregated_report(), output_type=output_type) logger.info(json.dumps(dict(stats_dict), indent=4)) stats_dict.close() return jsonify(data)
def benchmark_steem_passtrough(): sw = stopwatch.StopWatch() steem = Node().default() with sw.timer('connection_speed'): for i in range(1000): with sw.timer('default'): print(i) Account("furion") for i in range(1000): with sw.timer('passtrough'): Account("furion", steem=steem) # for i in range(100): # with sw.timer('preselected'): # Node().default2() print(stopwatch.format_report(sw.get_last_aggregated_report())) # passing vs initiating a new doesn't make much difference # ************************ # *** StopWatch Report *** # ************************ # connection_speed 3054.388ms (100%) # default 1000 1525.720ms (50%) # passtrough 1000 1501.971ms (49%) # Annotations:
def prepare(): with sw.timer('prepare'): if WITH_CLEANER: clean_storage() print(colored.yellow('Storage cleaned.')) collect_used_data() print(colored.yellow('Data collected.')) cboe_download(check_latest=CHECK_LATEST) print(colored.yellow('CBOE data downloaded.')) download_futures(last=True, forceDownload=True) run_derivatives() print(colored.yellow('CBOE futures downloaded.')) run_fred() print(colored.yellow('FRED data downloaded.')) run_quandl(check_latest=CHECK_LATEST) print(colored.yellow('Quandl data downloaded.')) generate_indicators() print(colored.yellow('Indicators geerated.')) generate_portfolios() print(colored.yellow('Portfolios geerated.')) generate_strategies(check_latest=CHECK_LATEST) print(colored.yellow('Strategies geerated.')) print(format_report(sw.get_last_aggregated_report()))
agent.load_state_dict(target_agent.state_dict()) break ## CRASH AND RESUME LOGIC: if args.prod_mode: if not os.path.exists(f"models/{experiment_name}"): os.makedirs(f"models/{experiment_name}") torch.save(agent.state_dict(), f"{wandb.run.dir}/agent.pt") wandb.save(f"agent.pt") # TRY NOT TO MODIFY: record rewards for plotting purposes writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]['lr'], global_step) writer.add_scalar("charts/update", update, global_step) writer.add_scalar("losses/value_loss", v_loss.item(), global_step) writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) writer.add_scalar("losses/entropy", entropy.mean().item(), global_step) writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) if args.kle_stop or args.kle_rollback: writer.add_scalar("debug/pg_stop_iter", i_epoch_pi, global_step) print("SPS:", int(global_step / (time.time() - start_time))) print(stopwatch.format_report(sw.get_last_aggregated_report())) envs.close() writer.close()
def act(inputs): sw = stopwatch.StopWatch() args, experiment_name, i, lock, stats_queue, device, \ next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables, \ rollout_task_queue, policy_request_queues, learner_request_queue = inputs envs = [] def make_env(gym_id, seed, idx): env = gym.make(gym_id) env = wrap_atari(env) env = gym.wrappers.RecordEpisodeStatistics(env) env = wrap_deepmind( env, clip_rewards=True, frame_stack=True, scale=False, ) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env envs = [make_env(args.gym_id, args.seed+i, i) for i in range(args.num_envs)] envs = np.array(envs, dtype=object) # for "Double-buffered" sampling policy_request_queue_idx = 0 for split_idx in range(args.num_env_split): policy_request_idxs = [] for env_idx, env in enumerate(envs[split_idx::args.num_env_split]): next_obs[i,split_idx,env_idx,0,0] = env.reset() next_done[i,split_idx,env_idx,0,0] = 0 policy_request_idxs += [[i,split_idx,env_idx,0,0,0]] policy_request_queue_idx = (policy_request_queue_idx + 1) % args.num_policy_workers policy_request_queues[policy_request_queue_idx].put(policy_request_idxs) last_report = last_report_frames = total_env_frames = 0 while True: with sw.timer('act'): with sw.timer('wait_rollout_task_queue'): tasks = [] while len(tasks) == 0: try: tasks = rollout_task_queue.get_many(timeout=0.01) except Empty: pass for task in tasks: # for "Double-buffered" sampling with sw.timer('rollouts'): split_idx, step = task policy_request_idxs = [] for env_idx, env in enumerate(envs[split_idx::args.num_env_split]): obs[i,split_idx,env_idx,0,0,step] = next_obs[i,split_idx,env_idx,0,0].copy() dones[i,split_idx,env_idx,0,0,step] = next_done[i,split_idx,env_idx,0,0] next_obs[i,split_idx,env_idx,0,0], r, d, info = env.step(actions[i,split_idx,env_idx,0,0,step]) if d: next_obs[i,split_idx,env_idx,0,0] = env.reset() rewards[i,split_idx,env_idx,0,0,step] = r next_done[env_idx] = d next_step = (step + 1) % args.num_steps policy_request_idxs += [[i,split_idx,env_idx,0,0,next_step]] num_frames = 1 total_env_frames += num_frames if 'episode' in info.keys(): stats_queue.put(info['episode']['l']) with sw.timer('policy_request_queue.put'): policy_request_queue_idx = (policy_request_queue_idx + 1) % args.num_policy_workers policy_request_queues[policy_request_queue_idx].put(policy_request_idxs) if total_env_frames % 1000 == 0 and i == 0: print(stopwatch.format_report(sw.get_last_aggregated_report()))
def start_policy_worker(inputs): # raise i, args, experiment_name, lock, stats_queue, device, \ next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables,\ rollout_task_queues, policy_request_queues, learner_request_queue, new_policy_queues = inputs sw = stopwatch.StopWatch() device = torch.device('cuda') agent = Agent(4).to(device) min_num_requests = 3 wait_for_min_requests = 0.01 # time.sleep(5) step = 0 while True: step += 1 with sw.timer('policy_worker'): waiting_started = time.time() with sw.timer('policy_requests'): policy_requests = [] while len(policy_requests) < min_num_requests and time.time( ) - waiting_started < wait_for_min_requests: try: policy_requests.extend( policy_request_queues[i].get_many(timeout=0.005)) except Empty: pass if len(policy_requests) == 0: continue with sw.timer('prepare_data'): ls = np.concatenate(policy_requests) idxs = tuple(ls.T) with sw.timer('index'): t1 = next_obs[idxs[:-1]] with sw.timer('create array at gpu'): next_o = torch.Tensor(t1).to(device, non_blocking=True) with sw.timer('prepare_data2'): rollout_worker_idxs = ls.T[0, ::args.num_envs // args.num_env_split] split_idxs = ls.T[1, ::args.num_envs // args.num_env_split] step_idxs = ls.T[-1, ::args.num_envs // args.num_env_split] with sw.timer('inference'): with torch.no_grad(): a, l, _ = agent.get_action(next_o) v = agent.get_value(next_o) with sw.timer('move_to_cpu'): actions[idxs] = a.cpu() logprobs[idxs] = l.cpu() values[idxs] = v.flatten().cpu() with sw.timer('execute_action'): for j in range(len(rollout_worker_idxs)): rollout_worker_idx = rollout_worker_idxs[j] split_idx = split_idxs[j] step_idx = step_idxs[j] rollout_task_queues[rollout_worker_idx].put( [split_idx, step_idx]) with sw.timer('update_policy'): try: new_policies = new_policy_queues[i].get_many(timeout=0.005) agent.load_state_dict(new_policies[-1]) except Empty: pass if step % 100 == 0: # print(ls.shape) print(stopwatch.format_report(sw.get_last_aggregated_report()))