コード例 #1
0
def start_policy_worker(inputs):
    sw = stopwatch.StopWatch()
    args, experiment_name, i, lock, stats_queue, device, \
        next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables, \
            rollout_task_queues, policy_request_queue, learner_request_queue = inputs
    device = torch.device('cuda')
    agent = Agent(4).to(device)
    min_num_requests = 6
    wait_for_min_requests = 0.025
    # time.sleep(5)
    step = 0
    while True:
        step += 1
        with sw.timer('policy_worker'):
            waiting_started = time.time()
            policy_requests = []
            with sw.timer('policy_requests'):
                while len(policy_requests) < min_num_requests and time.time(
                ) - waiting_started < wait_for_min_requests:
                    try:
                        policy_requests.extend(
                            policy_request_queue.get_many(timeout=0.005))
                    except Empty:
                        pass
                if len(policy_requests) == 0:
                    continue
            with sw.timer('prepare_data'):
                ls = np.concatenate(policy_requests)
                rollout_worker_idxs = ls.T[0, ::args.num_envs //
                                           args.num_env_split]
                split_idxs = ls.T[1, ::args.num_envs // args.num_env_split]
                step_idxs = ls.T[-1, ::args.num_envs // args.num_env_split]
                idxs = tuple(ls.T)
            with sw.timer('index'):
                t1 = next_obs[idxs[:-1]]
            with sw.timer('create array'):
                t2 = torch.from_numpy(next_obs[idxs[:-1]])
            with sw.timer('convert float'):
                t3 = t2.float()
            with sw.timer('move_to_gpu'):
                next_o = t3.to(device)
                # next_o = torch.from_numpy(next_obs[idxs[:-1]]).float().to(device)
            with sw.timer('inference'):
                with torch.no_grad():
                    a, l, e = agent.get_action(next_o)
            with sw.timer('move_to_cpu'):
                actions[idxs] = a.cpu()
            with sw.timer('execute_action'):
                for j in range(len(rollout_worker_idxs)):
                    rollout_worker_idx = rollout_worker_idxs[j]
                    split_idx = split_idxs[j]
                    step_idx = step_idxs[j]
                    rollout_task_queues[rollout_worker_idx].put(
                        [split_idx, step_idx])
        if step % 1000 == 0:
            print(ls.shape)
            print(stopwatch.format_report(sw.get_last_aggregated_report()))
コード例 #2
0
def benchmark_find_local_nodes_impact():
    sw = stopwatch.StopWatch()
    with sw.timer('connection_speed'):
        for i in range(100):
            with sw.timer('default'):
                Node().default()
                # for i in range(100):
                #     with sw.timer('preselected'):
                #         Node().default2()
    print(stopwatch.format_report(sw.get_last_aggregated_report()))
コード例 #3
0
    def start_rollout_worker(self, rollout_worker_idx, env_fns):
        sw = stopwatch.StopWatch()
        next_obs, next_done, obs, actions, logprobs, rewards, dones, values = self.storage
        env_idxs = range(
            rollout_worker_idx * self.num_envs_per_rollout_worker,
            rollout_worker_idx * self.num_envs_per_rollout_worker +
            self.num_envs_per_rollout_worker)
        envs = [None for _ in range(len(self.env_fns))]
        for env_idx in env_idxs:
            envs[env_idx] = self.env_fns[env_idx]()
            next_step = 0
            self.policy_request_queue.put(
                [next_step, env_idx, rollout_worker_idx])
            next_obs[env_idx] = torch.tensor(envs[env_idx].reset())
            next_done[env_idx] = 0

        local_step = 0
        while True:
            with sw.timer('act'):
                with sw.timer('wait_rollout_task_queue'):
                    tasks = self.rollout_task_queues[
                        rollout_worker_idx].get_many()

                with sw.timer('rollouts'):
                    for task in tasks:
                        step, env_idx = task
                        obs[step, env_idx] = next_obs[env_idx].copy()
                        dones[step, env_idx] = next_done[env_idx].copy()

                        next_obs[env_idx], r, d, info = envs[env_idx].step(
                            actions[step, env_idx])
                        if d:
                            next_obs[env_idx] = envs[env_idx].reset()
                        rewards[step, env_idx] = r
                        next_done[env_idx] = d
                        next_step = step + 1
                        local_step += 1

                        with sw.timer('logging'):
                            self.policy_request_queue.put(
                                [next_step, env_idx, rollout_worker_idx])
                            if 'episode' in info.keys():
                                # print(["charts/episode_reward", info['episode']['r']])
                                # self.stats_queue.put(['l', info['episode']['l']])
                                self.stats_queue.put([
                                    "charts/episode_reward",
                                    info['episode']['r']
                                ])

            if local_step % 1000 == 0:
                print(stopwatch.format_report(sw.get_last_aggregated_report()))
                print()
コード例 #4
0
ファイル: dataloader.py プロジェクト: johnlime/cleanrl
def start_policy_worker(inputs):
    # raise
    args, experiment_name, i, lock, stats_queue, device, \
        next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables, \
            rollout_task_queues, policy_request_queue, learner_request_queue = inputs
    data_loader = torch.utils.data.DataLoader(PolicyWorkerDataset(policy_request_queue, obs), batch_size=200) #, num_workers=2, pin_memory=True
    sw = stopwatch.StopWatch()
    device = torch.device('cuda')
    agent = Agent(4).to(device)
    min_num_requests = 3
    wait_for_min_requests = 0.01
    # time.sleep(5)
    step = 0
    for batch_idx, (ls, next_o) in enumerate(data_loader):
        step += 1
        with sw.timer('policy_worker'):
            with sw.timer('create array at gpu'):
                next_o = next_o.to(device, non_blocking=True)
            with sw.timer("prepare_data"):
                ls = ls.numpy()
                rollout_worker_idxs = ls.T[0,::args.num_envs//args.num_env_split]
                split_idxs = ls.T[1,::args.num_envs//args.num_env_split]
                step_idxs = ls.T[-1,::args.num_envs//args.num_env_split]
                idxs = tuple(ls.T)
            with sw.timer('inference'):
                with torch.no_grad():
                    a, l, e = agent.get_action(next_o)
            with sw.timer('move_to_cpu'):
                actions[idxs] = a.cpu()
            with sw.timer('execute_action'):
                for j in range(len(rollout_worker_idxs)):
                    rollout_worker_idx = rollout_worker_idxs[j]
                    split_idx = split_idxs[j]
                    step_idx = step_idxs[j]
                    rollout_task_queues[rollout_worker_idx].put([split_idx,step_idx])

                # for idx, item in enumerate(idxs):
                #     rollout_worker_idx = item[0]
                #     split_idx = item[1]
                #     step_idx = item[2]
                #     with sw.timer('put_action'):
                #         rollout_task_queues[rollout_worker_idx].put([split_idx,step_idx])
            
                # actions[idxs] = a.cpu()
                # for j in range(len(rollout_worker_idxs)):
                #     rollout_worker_idx = rollout_worker_idxs[j]
                #     split_idx = split_idxs[j]
                #     step_idx = step_idxs[j]
        if step % 100 == 0:
            # print(ls.shape)
            print(stopwatch.format_report(sw.get_last_aggregated_report()))
コード例 #5
0
def benchmark_steem_passtrough():
    sw = stopwatch.StopWatch()
    steem = Node().default()
    with sw.timer('connection_speed'):
        for i in range(1000):
            with sw.timer('default'):
                print(i)
                Account("furion")
        for i in range(1000):
            with sw.timer('passtrough'):
                Account("furion", steem=steem)
                # for i in range(100):
                #     with sw.timer('preselected'):
                #         Node().default2()
    print(stopwatch.format_report(sw.get_last_aggregated_report()))
コード例 #6
0
ファイル: test_stopwatch.py プロジェクト: timabbott/stopwatch
    def test_format_report(self):
        sw = StopWatch()
        add_timers(sw)

        agg_report = sw.get_last_aggregated_report()
        formatted_report = format_report(agg_report)
        assert formatted_report == \
            "************************\n" \
            "*** StopWatch Report ***\n" \
            "************************\n" \
            "root                    900000.000 (100%)\n" \
            "    BUCKET_A        child1                  2  240000.000 (27%)\n" \
            "                        grand_children1         1  20000.000 (2%)\n" \
            "                        grand_children2         2  80000.000 (9%)\n" \
            "                        grand_children3         1  10000.000 (1%)\n" \
            "    BUCKET_B        child2                  1  560000.000 (62%)\n" \
            "                        grand_children1         1  260000.000 (29%)\n" \
            "                        grand_children3         1  10000.000 (1%)\n" \
            "Tags: Cooltag, Slowtag"
コード例 #7
0
    def test_format_report(self):
        sw = StopWatch()
        add_timers(sw)

        agg_report = sw.get_last_aggregated_report()
        formatted_report = format_report(agg_report)
        assert formatted_report == \
            "root                    900000.000ms (100%)\n" \
            "    BUCKET_A        child1                  2  240000.000ms (27%)\n" \
            "                        grand_children1         1  20000.000ms (2%)\n" \
            "                        grand_children2         2  80000.000ms (9%)\n" \
            "                        grand_children3         1  10000.000ms (1%)\n" \
            "    BUCKET_B        child2                  1  560000.000ms (62%)\n" \
            "                        grand_children1         1  260000.000ms (29%)\n" \
            "                        grand_children3         1  10000.000ms (1%)\n" \
            "Annotations: Cooltag, Slowtag"

        formatted_report2 = sw.format_last_report()
        assert formatted_report == formatted_report2
コード例 #8
0
ファイル: app.py プロジェクト: etalab-ia/pseudo_api
def run_pseudonymize_request():
    data = {"success": False}
    stats_dict = SqliteDict('./api_stats.sqlite', autocommit=True)
    output_types = ["pseudonymized", "tagged", "conll"]
    try:
        if not request.form.get("output_type"):
            logging.info(
                "No tags were indicated. I will give you the text pseudonymized."
            )
            output_type = "pseudonymized"
        else:
            output_type = request.form.get("output_type")
            if output_type not in output_types:
                logging.warning(
                    "Your output type is not supported. I will give you the text pseudonymized."
                )
                output_type = "pseudonymized"

        if request.form.get("text"):
            text = request.form.get("text")
            logging.info("Tagging text with model...")
            # Predict and return a CoNLL string to send to the web demo app
            output, analysis_ner_stats = prepare_output(
                text=text, tagger=TAGGER, output_type=output_type)
            data["text"] = output
            data["success"] = True
            # stats_dict[:]
    except Exception as e:
        logger.error(e)
    finally:
        logger.info(stopwatch.format_report(sw.get_last_aggregated_report()))
        if data["success"]:
            update_stats(analysis_stats=stats_dict,
                         analysis_ner_stats=analysis_ner_stats,
                         time_info=sw.get_last_aggregated_report(),
                         output_type=output_type)
        logger.info(json.dumps(dict(stats_dict), indent=4))
        stats_dict.close()
        return jsonify(data)
コード例 #9
0
ファイル: node.py プロジェクト: GwenNelson/steemtools
def benchmark_steem_passtrough():
    sw = stopwatch.StopWatch()
    steem = Node().default()
    with sw.timer('connection_speed'):
        for i in range(1000):
            with sw.timer('default'):
                print(i)
                Account("furion")
        for i in range(1000):
            with sw.timer('passtrough'):
                Account("furion", steem=steem)
        # for i in range(100):
        #     with sw.timer('preselected'):
        #         Node().default2()
    print(stopwatch.format_report(sw.get_last_aggregated_report()))
# passing vs initiating a new doesn't make much difference
# ************************
# *** StopWatch Report ***
# ************************
# connection_speed        3054.388ms (100%)
#                     default              1000  1525.720ms (50%)
#                     passtrough           1000  1501.971ms (49%)
# Annotations:
コード例 #10
0
def prepare():
    with sw.timer('prepare'):
        if WITH_CLEANER:
            clean_storage()
            print(colored.yellow('Storage cleaned.'))
        collect_used_data()
        print(colored.yellow('Data collected.'))
        cboe_download(check_latest=CHECK_LATEST)
        print(colored.yellow('CBOE data downloaded.'))
        download_futures(last=True, forceDownload=True)
        run_derivatives()
        print(colored.yellow('CBOE futures downloaded.'))
        run_fred()
        print(colored.yellow('FRED data downloaded.'))
        run_quandl(check_latest=CHECK_LATEST)
        print(colored.yellow('Quandl data downloaded.'))
        generate_indicators()
        print(colored.yellow('Indicators geerated.'))
        generate_portfolios()
        print(colored.yellow('Portfolios geerated.'))
        generate_strategies(check_latest=CHECK_LATEST)
        print(colored.yellow('Strategies geerated.'))
    print(format_report(sw.get_last_aggregated_report()))
コード例 #11
0
                        agent.load_state_dict(target_agent.state_dict())
                        break

            ## CRASH AND RESUME LOGIC:
            if args.prod_mode:
                if not os.path.exists(f"models/{experiment_name}"):
                    os.makedirs(f"models/{experiment_name}")
                torch.save(agent.state_dict(), f"{wandb.run.dir}/agent.pt")
                wandb.save(f"agent.pt")

            # TRY NOT TO MODIFY: record rewards for plotting purposes
            writer.add_scalar("charts/learning_rate",
                              optimizer.param_groups[0]['lr'], global_step)
            writer.add_scalar("charts/update", update, global_step)
            writer.add_scalar("losses/value_loss", v_loss.item(), global_step)
            writer.add_scalar("losses/policy_loss", pg_loss.item(),
                              global_step)
            writer.add_scalar("losses/entropy",
                              entropy.mean().item(), global_step)
            writer.add_scalar("losses/approx_kl", approx_kl.item(),
                              global_step)
            if args.kle_stop or args.kle_rollback:
                writer.add_scalar("debug/pg_stop_iter", i_epoch_pi,
                                  global_step)
            print("SPS:", int(global_step / (time.time() - start_time)))

    print(stopwatch.format_report(sw.get_last_aggregated_report()))

envs.close()
writer.close()
コード例 #12
0
ファイル: dataloader.py プロジェクト: johnlime/cleanrl
def act(inputs):
    sw = stopwatch.StopWatch()
    args, experiment_name, i, lock, stats_queue, device, \
        next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables, \
            rollout_task_queue, policy_request_queues, learner_request_queue = inputs
    envs = []
    
    def make_env(gym_id, seed, idx):
        env = gym.make(gym_id)
        env = wrap_atari(env)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        env = wrap_deepmind(
            env,
            clip_rewards=True,
            frame_stack=True,
            scale=False,
        )
        env.seed(seed)
        env.action_space.seed(seed)
        env.observation_space.seed(seed)
        return env
    envs = [make_env(args.gym_id, args.seed+i, i) for i in range(args.num_envs)]
    envs = np.array(envs, dtype=object)

    # for "Double-buffered" sampling
    policy_request_queue_idx = 0
    for split_idx in range(args.num_env_split):
        policy_request_idxs = []
        for env_idx, env in enumerate(envs[split_idx::args.num_env_split]):
            next_obs[i,split_idx,env_idx,0,0] = env.reset()
            next_done[i,split_idx,env_idx,0,0] = 0
            policy_request_idxs += [[i,split_idx,env_idx,0,0,0]]
        policy_request_queue_idx = (policy_request_queue_idx + 1) % args.num_policy_workers
        policy_request_queues[policy_request_queue_idx].put(policy_request_idxs)

    last_report = last_report_frames = total_env_frames = 0
    
    while True:
        with sw.timer('act'):
            with sw.timer('wait_rollout_task_queue'):
                tasks = []
                while len(tasks) == 0:
                    try:
                        tasks = rollout_task_queue.get_many(timeout=0.01)
                    except Empty:
                        pass
            for task in tasks:
                # for "Double-buffered" sampling
                with sw.timer('rollouts'):
                    split_idx, step = task
                    policy_request_idxs = []
                    for env_idx, env in enumerate(envs[split_idx::args.num_env_split]):
                        obs[i,split_idx,env_idx,0,0,step] = next_obs[i,split_idx,env_idx,0,0].copy()
                        dones[i,split_idx,env_idx,0,0,step] = next_done[i,split_idx,env_idx,0,0]
                        next_obs[i,split_idx,env_idx,0,0], r, d, info = env.step(actions[i,split_idx,env_idx,0,0,step])
                        if d:
                            next_obs[i,split_idx,env_idx,0,0] = env.reset()
                        rewards[i,split_idx,env_idx,0,0,step] = r
                        next_done[env_idx] = d
                        
                        next_step = (step + 1) % args.num_steps  
                        policy_request_idxs += [[i,split_idx,env_idx,0,0,next_step]]
        
                        num_frames = 1
                        total_env_frames += num_frames
            
                        if 'episode' in info.keys():
                            stats_queue.put(info['episode']['l'])
                with sw.timer('policy_request_queue.put'):
                    policy_request_queue_idx = (policy_request_queue_idx + 1) % args.num_policy_workers
                    policy_request_queues[policy_request_queue_idx].put(policy_request_idxs)
        if total_env_frames % 1000 == 0 and i == 0:
            print(stopwatch.format_report(sw.get_last_aggregated_report()))
コード例 #13
0
def start_policy_worker(inputs):
    # raise
    i, args, experiment_name, lock, stats_queue, device, \
        next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables,\
        rollout_task_queues, policy_request_queues, learner_request_queue, new_policy_queues = inputs
    sw = stopwatch.StopWatch()
    device = torch.device('cuda')
    agent = Agent(4).to(device)
    min_num_requests = 3
    wait_for_min_requests = 0.01
    # time.sleep(5)
    step = 0
    while True:
        step += 1
        with sw.timer('policy_worker'):
            waiting_started = time.time()
            with sw.timer('policy_requests'):
                policy_requests = []
                while len(policy_requests) < min_num_requests and time.time(
                ) - waiting_started < wait_for_min_requests:
                    try:
                        policy_requests.extend(
                            policy_request_queues[i].get_many(timeout=0.005))
                    except Empty:
                        pass
                if len(policy_requests) == 0:
                    continue
            with sw.timer('prepare_data'):
                ls = np.concatenate(policy_requests)
                idxs = tuple(ls.T)
            with sw.timer('index'):
                t1 = next_obs[idxs[:-1]]
            with sw.timer('create array at gpu'):
                next_o = torch.Tensor(t1).to(device, non_blocking=True)
            with sw.timer('prepare_data2'):
                rollout_worker_idxs = ls.T[0, ::args.num_envs //
                                           args.num_env_split]
                split_idxs = ls.T[1, ::args.num_envs // args.num_env_split]
                step_idxs = ls.T[-1, ::args.num_envs // args.num_env_split]
            with sw.timer('inference'):
                with torch.no_grad():
                    a, l, _ = agent.get_action(next_o)
                    v = agent.get_value(next_o)
            with sw.timer('move_to_cpu'):
                actions[idxs] = a.cpu()
                logprobs[idxs] = l.cpu()
                values[idxs] = v.flatten().cpu()
            with sw.timer('execute_action'):
                for j in range(len(rollout_worker_idxs)):
                    rollout_worker_idx = rollout_worker_idxs[j]
                    split_idx = split_idxs[j]
                    step_idx = step_idxs[j]
                    rollout_task_queues[rollout_worker_idx].put(
                        [split_idx, step_idx])
            with sw.timer('update_policy'):
                try:
                    new_policies = new_policy_queues[i].get_many(timeout=0.005)
                    agent.load_state_dict(new_policies[-1])
                except Empty:
                    pass
        if step % 100 == 0:
            # print(ls.shape)
            print(stopwatch.format_report(sw.get_last_aggregated_report()))