Esempio n. 1
0
def env_step(rank, args, action_logits, values, observations, rollout_storages,
             wait, done_list, step_dones, please_load_model,
             please_load_model_actor, shared_cpu_actor_critics,
             shared_cpu_actor_critics_env_actor, all_episode_scores,
             vgl_display):
    """

    Environment process grabs action logit from the action buffer and sample an action according to the action logit.
    Then it executes the action and send the next observation to the observation buffer. The transition tuples are
    stroed to data storage.

    Args:
        rank: environment process id.
        args: command line argument.
        action_logits: A shared PyTorch tensor served as an action buffer.
        values: A shared PyTorch tensor served as a value buffer.
        observations:  A shared PyTorch tensor served as an observation buffer.
        rollout_storages: A list of two rollout storage.
        wait: A shared list that indicates if environment processes are waiting for updated model.
        done_list: A shared list that indicates if environment processes finish all steps.
        step_dones: A shared list to indicate environment processes finish one environment step.
        please_load_model: A shared integer. Set to zero when finishing loading the update model from learner.
        please_load_model_actor: A shared array between actors and the environment process 0. When updated model is
            available. It is set to one.
        shared_cpu_actor_critics: A list of shared models. It contains the updated parameters.
        shared_cpu_actor_critics_env_actor: Shared models between actor and environment processes. Actor processes will
            load models from environment process 0.
        all_episode_scores: A shared list that collect all episode score from all environment processes

    Returns:
        None
    """

    os.environ['VGL_DISPLAY'] = vgl_display
    torch.manual_seed(args.seed + rank)

    env = football_env.create_environment(
        representation=args.representation,
        env_name=args.env_name,
        stacked=('stacked' in args.state),
        rewards=args.reward_experiment,
        logdir=args.log_dir,
        render=args.render and (args.seed == 0),
        dump_frequency=50 if args.render and args.seed == 0 else 0,
        other_config_options={'game_engine_random_seed': args.seed + rank})
    env = EpisodeRewardScoreWrapper(env,
                                    number_of_left_players_agent_controls=1,
                                    number_of_right_players_agent_controls=0)
    env.seed(args.seed + rank)
    if args.noop > 0:
        env = GFNoopResetEnv(env, noop_max=args.noop, seed=args.seed + rank)

    if args.num_agents == 1:
        from a2c_ppo_acktr.envs import ObsUnsqueezeWrapper
        env = ObsUnsqueezeWrapper(env)
    env = EpisodeRewardScoreWrapper(
        env,
        number_of_left_players_agent_controls=args.num_left_agents,
        number_of_right_players_agent_controls=args.num_right_agents)
    step_dones_np = np.frombuffer(step_dones.get_obj(), dtype=np.int32)
    step_dones_np = step_dones_np.reshape(args.num_processes)

    obs = env.reset()
    aug_feat_dim = 0

    # store the rollout by this process. After args.sync_every steps, batch copy to rollouts
    local_rollouts = RolloutStorageMA(args.sync_every,
                                      1,
                                      env.observation_space.shape[1:],
                                      env.action_space if args.num_agents == 1
                                      else Discrete(env.action_space.nvec[0]),
                                      recurrent_hidden_state_size=1,
                                      num_agents=args.num_agents,
                                      aug_size=aug_feat_dim)

    observations[rank] = torch.from_numpy(obs)
    step_dones_np[rank] = 1

    local_rollouts.obs[0].copy_(torch.from_numpy(obs).float().unsqueeze(0))
    num_steps = int(math.ceil(args.num_env_steps / args.num_processes))
    recurrent_hidden_states = torch.ones(1)
    print('Num of steps per environment', num_steps)
    sync_count = 0
    target_eval_step = 0

    if rank == 0:
        plot = {
            'steps': [],
            'avg_scores': [],
            'time_elapsed': [],
            'fps': [],
            'avg_rewards': [],
            'final_scores': [],
            'final_rewards': [],
            'fps_one_sync': []
        }
    scores = []
    episode_rewards = []
    start_sync = time.time()
    start_rollout = time.time()
    env_step_timer_start = time.time()
    if args.dump_traj_flag:
        prev_obs = copy.deepcopy(obs)
        dump_traj = {'action': [], 'obs': [], 'action_logit': [], 'v': []}
    for step in range(num_steps):
        # Observe reward and next observation
        while True:
            if step_dones_np[rank] == 0:
                break
        value_pred = values[rank].clone()
        dist = Categorical(logits=copy.deepcopy(action_logits[rank]))
        action = dist.sample()
        action_log_prob = dist.log_probs(action)
        obs, reward, done, infos = env.step(action.numpy().reshape(-1))
        if args.dump_traj_flag:
            dump_traj['action'].append(action)
            dump_traj['obs'].append(prev_obs)
            dump_traj['action_logit'].append(copy.deepcopy(
                action_logits[rank]))
            dump_traj['v'].append(value_pred)
        if done:
            if rank == 0:
                scores.append(infos['episode_score'])
                sys.stdout.flush()
            obs = env.reset()
            episode_rewards.append(
                np.sum(infos['episode_reward'][:args.num_left_agents]))
            all_episode_scores.append(infos['episode_score'])

        prev_obs = copy.deepcopy(obs)
        aug_obs = None
        obs = torch.from_numpy(obs)
        observations[rank] = obs

        masks = torch.FloatTensor([0.0]) if done else torch.FloatTensor([1.0])
        bad_masks = torch.FloatTensor([
            0.0
        ]) if 'bad_transition' in infos.keys() else torch.FloatTensor([1.0])
        reward = torch.FloatTensor([reward])

        local_rollouts.insert(obs,
                              recurrent_hidden_states,
                              action,
                              action_log_prob,
                              values[rank],
                              reward.resize_(1, args.num_agents, 1),
                              masks,
                              bad_masks,
                              aug_obs=aug_obs)

        if step % args.sync_every == 0 and step != 0:
            per_sync_time = time.time() - start_sync
            if sync_count == 19 and args.dump_traj_flag:
                import pickle
                with open(
                        '/tmp/traj_run{}_{}.pkl'.format(
                            args.dump_run_id, rank), 'wb') as output:
                    pickle.dump(dump_traj, output, pickle.HIGHEST_PROTOCOL)
                    print("INFO: Dump trajectories for testing")
            # Copy local rollout to rollout_storage for training
            st_idx = sync_count % 2
            rollout_storages[st_idx].single_process_batch_insert(
                rank,
                local_rollouts.obs,
                local_rollouts.recurrent_hidden_states,
                local_rollouts.actions,
                local_rollouts.action_log_probs,
                local_rollouts.value_preds,
                local_rollouts.rewards,
                local_rollouts.masks,
                local_rollouts.bad_masks,
                aug_obs=local_rollouts.aug_obs)
            local_rollouts.after_update()
            sync_count += 1
            if rank == 0:
                print(
                    'Rollout time                 : {:.6f} s\n'
                    'Last {} episode average score: {:.6f} rew: {:.6f}'.format(
                        time.time() - start_rollout, 10, np.mean(scores[-10:]),
                        np.mean(episode_rewards[-args.num_processes:])))
                sys.stdout.flush()
            wait[rank] = True

            if rank == 0 and sync_count % 100 == 0:
                total_steps = sync_count * args.num_processes * args.sync_every
                target_eval_step += args.eval_freq
                plot['avg_scores'].append(np.mean(scores[-10:]))
                plot['final_scores'].append(np.mean(plot['avg_scores'][-10:]))
                plot['steps'].append(total_steps)
                time_elapsed = time.time() - env_step_timer_start
                plot['time_elapsed'].append(time_elapsed)
                plot['fps'].append(total_steps // time_elapsed)
                plot['fps_one_sync'].append(args.num_processes *
                                            args.sync_every // per_sync_time)
                plot['avg_rewards'].append(np.mean(episode_rewards[-10:]))
                plot['final_rewards'].append(np.mean(
                    plot['avg_rewards'][-10:]))
                curve_file_path = os.path.join(args.log_dir, args.exp_name,
                                               'rank0_curve.csv')
                dict2csv(plot, curve_file_path)
                print('Wrote training curve to ', curve_file_path)
                sys.stdout.flush()

            while True:
                # Load the updated model for actor, ask actor to load,
                # wait for ack from actor, and send ack signal back to learner.
                if rank == 0 and please_load_model.value == 1:
                    for agent_idx in range(args.num_agents):
                        stat_dict = shared_cpu_actor_critics[
                            agent_idx].state_dict()
                        shared_cpu_actor_critics_env_actor[
                            agent_idx].load_state_dict(stat_dict)
                    please_load_model_actor[:] = 1
                    while True:
                        if torch.all(please_load_model_actor == 0).item():
                            break
                    please_load_model.value = 0
                if wait[rank] == False:
                    break
            start_sync = time.time()
            start_rollout = time.time()
        step_dones_np[rank] = 1

    done_list[rank] = True
    print('Done env ', rank)
Esempio n. 2
0
def eval_model_q(test_q, done_training, args):
    plot = {'good_rewards': [], 'adversary_rewards': [], 'rewards': [], 'steps': [], 'q_loss': [], 'gcn_q_loss': [],
            'p_loss': [], 'final': [], 'abs': []}
    best_eval_reward = -100000000
    while True:
        if not test_q.empty():
            print('=================== start eval ===================')
            eval_env = make_env(args.scenario, args)
            eval_env.seed(args.seed + 10)
            eval_rewards = []
            good_eval_rewards = []
            agent, tr_log = test_q.get()
            with temp_seed(args.seed):
                for n_eval in range(args.num_eval_runs):
                    obs_n = eval_env.reset()
                    episode_reward = 0
                    episode_step = 0
                    n_agents = eval_env.n
                    agents_rew = [[] for _ in range(n_agents)]
                    while True:
                        action_n = agent.select_action(torch.Tensor(obs_n), action_noise=True,
                                                       param_noise=False).squeeze().cpu().numpy()
                        next_obs_n, reward_n, done_n, _ = eval_env.step(action_n)
                        episode_step += 1
                        terminal = (episode_step >= args.num_steps)
                        episode_reward += np.sum(reward_n)
                        for i, r in enumerate(reward_n):
                            agents_rew[i].append(r)
                        obs_n = next_obs_n
                        if done_n[0] or terminal:
                            eval_rewards.append(episode_reward)
                            agents_rew = [np.sum(rew) for rew in agents_rew]
                            good_reward = np.sum(agents_rew)
                            good_eval_rewards.append(good_reward)
                            if n_eval % 100 == 0:
                                print('test reward', episode_reward)
                            break
                if np.mean(eval_rewards) > best_eval_reward:
                    best_eval_reward = np.mean(eval_rewards)
                    torch.save({'agents': agent}, os.path.join(tr_log['exp_save_dir'], 'agents_best.ckpt'))

                plot['rewards'].append(np.mean(eval_rewards))
                plot['steps'].append(tr_log['total_numsteps'])
                plot['q_loss'].append(tr_log['value_loss'])
                plot['p_loss'].append(tr_log['policy_loss'])
                print("========================================================")
                print(
                    "Episode: {}, total numsteps: {}, {} eval runs, total time: {} s".
                        format(tr_log['i_episode'], tr_log['total_numsteps'], args.num_eval_runs,
                               time.time() - tr_log['start_time']))
                print("GOOD reward: avg {} std {}, average reward: {}, best reward {}".format(np.mean(eval_rewards),
                                                                                              np.std(eval_rewards),
                                                                                              np.mean(plot['rewards'][
                                                                                                      -10:]),
                                                                                              best_eval_reward))
                plot['final'].append(np.mean(plot['rewards'][-10:]))
                plot['abs'].append(best_eval_reward)
                dict2csv(plot, os.path.join(tr_log['exp_save_dir'], 'train_curve.csv'))
                eval_env.close()
        if done_training.value and test_q.empty():
            torch.save({'agents': agent}, os.path.join(tr_log['exp_save_dir'], 'agents.ckpt'))
            break
Esempio n. 3
0
File: eval.py Progetto: zixianma/PIC
def eval_model_q(test_q, done_training, args):
    if 'simple_tag' in args.scenario:
        plot = {
            'good_rewards': [],
            'adversary_rewards': [],
            'rewards': [],
            'collisions': [],
            'dists': [],
            'steps': [],
            'q_loss': [],
            'gcn_q_loss': [],
            'p_loss': [],
            'final': [],
            'abs': []
        }
    elif 'simple_coop_push' in args.scenario:
        plot = {
            'good_rewards': [],
            'adversary_rewards': [],
            'rewards': [],
            'collisions': [],
            'avg_dists': [],
            'occupied_targets': [],
            'steps': [],
            'q_loss': [],
            'gcn_q_loss': [],
            'p_loss': [],
            'final': [],
            'abs': []
        }
    elif 'spread' in args.scenario:
        plot = {
            'good_rewards': [],
            'adversary_rewards': [],
            'rewards': [],
            'collisions': [],
            'min_dists': [],
            'occupied_targets': [],
            'steps': [],
            'q_loss': [],
            'gcn_q_loss': [],
            'p_loss': [],
            'final': [],
            'abs': []
        }
    else:
        plot = {
            'good_rewards': [],
            'adversary_rewards': [],
            'rewards': [],
            'steps': [],
            'q_loss': [],
            'gcn_q_loss': [],
            'p_loss': [],
            'final': [],
            'abs': []
        }
    best_eval_reward = -100000000
    while True:
        if not test_q.empty():
            print('=================== start eval ===================')
            eval_env = make_env(args.scenario, args, benchmark=True)
            eval_env.seed(args.seed + 10)
            eval_rewards = []
            good_eval_rewards = []
            if 'simple_coop_push' in args.scenario or 'spread' in args.scenario:
                eval_occupied_targets = []
            eval_collisions = []
            eval_dists = []
            agent, tr_log = test_q.get()
            num_adversaries = eval_env.world.num_adversaries if hasattr(
                eval_env.world, 'num_adversaries') else 0
            with temp_seed(args.seed):
                for n_eval in range(args.num_eval_runs):
                    obs_n = eval_env.reset()
                    episode_reward = 0
                    episode_step = 0
                    n_agents = eval_env.n
                    agents_rew = [[] for _ in range(n_agents)]
                    if 'simple_tag' in args.scenario:
                        episode_benchmark = [0 for _ in range(2)]
                    elif 'simple_coop_push' in args.scenario or 'spread' in args.scenario:
                        episode_benchmark = [0 for _ in range(3)]
                    while True:
                        action_n = agent.select_action(
                            torch.Tensor(obs_n),
                            action_noise=True,
                            param_noise=False).squeeze().cpu().numpy()
                        next_obs_n, reward_n, done_n, info_n = eval_env.step(
                            action_n)
                        benchmark_n = np.asarray(info_n['n'])
                        episode_step += 1
                        if "simple_tag" in args.scenario:
                            # collisions for adversaries only
                            episode_benchmark[0] += sum(
                                benchmark_n[:num_adversaries, 0])
                            # min distance for good agents only
                            episode_benchmark[1] += sum(
                                benchmark_n[num_adversaries:, 1])
                        elif 'simple_coop_push' in args.scenario or 'spread' in args.scenario:
                            for i in range(len(episode_benchmark)):
                                episode_benchmark[i] += sum(benchmark_n[:, i])

                        terminal = (episode_step >= args.num_steps)

                        episode_reward += np.sum(reward_n)
                        for i, r in enumerate(reward_n):
                            agents_rew[i].append(r)
                        obs_n = next_obs_n
                        if done_n[0] or terminal:
                            eval_rewards.append(episode_reward)
                            agents_rew = [np.sum(rew) for rew in agents_rew]
                            good_reward = np.sum(agents_rew)
                            good_eval_rewards.append(good_reward)
                            eval_collisions.append(episode_benchmark[0])
                            eval_dists.append(episode_benchmark[1])
                            if 'simple_coop_push' in args.scenario or 'spread' in args.scenario:
                                eval_occupied_targets.append(
                                    episode_benchmark[2])
                            if n_eval % 100 == 0:
                                print('test reward', episode_reward)
                            break
                if np.mean(eval_rewards) > best_eval_reward:
                    best_eval_reward = np.mean(eval_rewards)
                    torch.save({'agents': agent},
                               os.path.join(tr_log['exp_save_dir'],
                                            'agents_best.ckpt'))

                plot['rewards'].append(np.mean(eval_rewards))
                if 'simple_tag' in args.scenario:
                    plot['collisions'].append(np.mean(eval_collisions))
                    plot['dists'].append(np.mean(eval_dists))
                elif 'simple_coop_push' in args.scenario:
                    plot['collisions'].append(np.mean(eval_collisions))
                    plot['avg_dists'].append(np.mean(eval_dists))
                    plot['occupied_targets'].append(
                        np.mean(eval_occupied_targets))
                elif 'spread' in args.scenario:
                    plot['collisions'].append(np.mean(eval_collisions))
                    plot['min_dists'].append(np.mean(eval_dists))
                    plot['occupied_targets'].append(
                        np.mean(eval_occupied_targets))
                plot['steps'].append(tr_log['total_numsteps'])
                plot['q_loss'].append(tr_log['value_loss'])
                plot['p_loss'].append(tr_log['policy_loss'])
                print(
                    "========================================================")
                print(
                    "Episode: {}, total numsteps: {}, {} eval runs, total time: {} s"
                    .format(tr_log['i_episode'], tr_log['total_numsteps'],
                            args.num_eval_runs,
                            time.time() - tr_log['start_time']))
                print(
                    "GOOD reward: avg {} std {}, average reward: {}, best reward {}, \
                    average collision {}, average dist {}".format(
                        np.mean(eval_rewards), np.std(eval_rewards),
                        np.mean(plot['rewards'][-10:]), best_eval_reward,
                        np.mean(eval_collisions), np.mean(eval_dists)))
                plot['final'].append(np.mean(plot['rewards'][-10:]))
                plot['abs'].append(best_eval_reward)
                dict2csv(
                    plot,
                    os.path.join(tr_log['exp_save_dir'], 'train_curve.csv'))

                eval_env.close()
        if done_training.value and test_q.empty():
            torch.save({'agents': agent},
                       os.path.join(tr_log['exp_save_dir'], 'agents.ckpt'))
            break
                        workers=args.workers,
                        p=args.p,
                        q=args.q)

    # Embed
    model = node2vec.fit(window=args.window_size,
                         min_count=args.min_count,
                         batch_words=args.batch_words,
                         seed=args.random_seed,
                         iter=args.iter)
    # Any keywords acceptable by gensim.Word2Vec can be passed, `diemnsions` and `workers`
    # are automatically passed (from the Node2Vec constructor)

    exec_time = time.time() - start
    print("exec time {}".format(exec_time))

    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)
    # Save embeddings for later use
    model.wv.save_word2vec_format(args.out_dir + "/" + args.out_file +
                                  args.out_name + ".emb")

    # Save model for later use
    model.save(args.out_dir + "/" + args.out_file + args.out_name + ".model")

    out_dict = args.__dict__
    out_dict["exec_time"] = exec_time

    dict2csv(args=out_dict,
             csv_name=args.out_dir + "/" + args.log_file + args.out_name)
    start = time.time()
    seed_set = heuristic()
    stop = time.time()
    exec_time = stop - start

    # save the spread value of the result
    spread = monte_carlo(G, seed_set, args.p, args.no_simulations, args.model,
                         prng)

    print("Seed set: {}, spread: {} \nExec time: {}".format(
        seed_set, spread, exec_time))

    # save result
    if args.out_dir is None:
        out_dir = "."
    else:
        out_dir = args.out_dir
        import os

        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    log_file = out_dir + "/" + "log" + args.out_name

    out_dict = args.__dict__
    out_dict["exec_time"] = exec_time
    out_dict["influence_spread"] = spread
    out_dict["seed_set"] = seed_set

    dict2csv(args=out_dict, csv_name=log_file)
Esempio n. 6
0
def eval_q(test_q, models, done_training, args):
    """

    Evaluation Processes

    Args:
        test_q: A shared queue to communicate with the learner process.
        models: Models for evaluation.
        done_training: A shared variable. Set to one when the learn finish its job.
        args: Command line argument.

    Returns:
        None
    """

    plot = {
        'steps': [],
        'left_rewards': [],
        'right_rewards': [],
        'rewards': [],
        'scores': [],
        'final_reward': [],
        'abs_reward': [],
        'final_score': [],
        'abs_score': []
    }
    best_eval_score_mean = -100000000
    eval_count = 0
    env = football_env.create_environment(
        env_name=args.env_name,
        stacked=('stacked' in args.state),
        rewards=args.reward_experiment,
        logdir=os.path.join(args.log_dir, args.exp_name, 'trace_video'),
        render=False,
        dump_frequency=1,
        representation=args.representation,
        number_of_left_players_agent_controls=args.num_left_agents,
        write_full_episode_dumps=True,
        write_video=True,
        write_goal_dumps=True,
        other_config_options={'game_engine_random_seed': args.seed + 10})
    local_models = copy.deepcopy(models)
    for agent_idx in range(args.num_agents):
        stat_dict = models[agent_idx].state_dict()
        local_models[agent_idx].load_state_dict(stat_dict)

    if args.num_agents == 1:
        from a2c_ppo_acktr.envs import ObsUnsqueezeWrapper
        env = ObsUnsqueezeWrapper(env)
    env = EpisodeRewardScoreWrapper(
        env,
        number_of_left_players_agent_controls=args.num_left_agents,
        number_of_right_players_agent_controls=args.num_right_agents)
    while True:
        if not test_q.empty():
            print('INFO: Start to evaluate')
            test_q.get()
            for agent_idx in range(args.num_agents):
                stat_dict = models[agent_idx].state_dict()
                local_models[agent_idx].load_state_dict(stat_dict)
            eval_rewards, eval_left_rewards, eval_right_rewards = [], [], []
            eval_scores = []
            eval_count += 1
            with temp_seed(args.seed):
                for n_eval in range(args.num_eval_runs):
                    print('INFO: Eval # ', n_eval)
                    obs = env.reset()
                    obs = torch.from_numpy(obs).float()
                    while True:
                        actions = np.zeros(args.num_agents, dtype=int)
                        for agent_idx in range(args.num_agents):
                            with torch.no_grad():
                                kargs = obs[agent_idx:agent_idx +
                                            1], None, None
                                _, _, _, action_logit = local_models[
                                    agent_idx].act(*kargs)
                            dist = Categorical(logits=action_logit)
                            action = dist.sample()
                            actions[agent_idx] = int(action.item())
                        obs, reward, done, infos = env.step(
                            actions.reshape(-1))
                        obs = torch.from_numpy(obs).float()
                        if done:
                            eval_left_rewards.append(
                                np.sum(infos['episode_reward']
                                       [:args.num_left_agents]))
                            if args.num_right_agents > 0:
                                eval_right_rewards.append(
                                    np.sum(infos['episode_reward']
                                           [args.num_left_agents:]))
                            eval_scores.append(infos['episode_score'])
                            break
                if np.mean(eval_scores) > best_eval_score_mean:
                    best_eval_left_reward_mean, best_eval_left_reward_std = np.mean(
                        eval_left_rewards), np.std(eval_left_rewards)
                    best_eval_score_mean, best_eval_score_std = np.mean(
                        eval_scores), np.std(eval_scores)
                plot['steps'].append((eval_count - 1) * args.eval_every_step)
                plot['left_rewards'].append(np.mean(eval_left_rewards))
                if eval_right_rewards:
                    plot['right_rewards'].append(np.mean(eval_right_rewards))
                plot['scores'].append(np.mean(eval_scores))
                plot['final_reward'].append(np.mean(
                    plot['left_rewards'][-10:]))
                plot['final_score'].append(np.mean(plot['scores'][-10:]))
                plot['abs_score'].append(best_eval_score_mean)
                print("------------Eval Summary------------\n"
                      "Total num env steps: {}, {} eval runs\n"
                      "score avg/std        {:.6f}/{:.6f}\n"
                      "final reward avg/std {:.6f}/{:.6f}\n"
                      "final score avg/std  {:.6f}/{:.6f}\n"
                      "best reward avg/std  {:.6f}/{:.6f}\n"
                      "best score avg/std   {:.6f}/{:.6f}\n"
                      "------------------------------------\n".format(
                          plot['steps'][-1], args.num_eval_runs,
                          np.mean(eval_scores), np.std(eval_scores),
                          np.mean(eval_left_rewards),
                          np.std(eval_left_rewards),
                          np.mean(plot['scores'][-10:]),
                          np.std(plot['scores'][-10:]),
                          best_eval_left_reward_mean,
                          best_eval_left_reward_std, best_eval_score_mean,
                          best_eval_score_std))
                curve_file_path = os.path.join(args.log_dir, args.exp_name,
                                               'train_curve.csv')
                dict2csv(plot, curve_file_path)
                print('INFO: Wrote training curve to ', curve_file_path)
                sys.stdout.flush()

        if done_training.value and test_q.empty():
            print('Finish Evaluation. Exit eval_q()')
            break
    print('Done Evaluation')
    env.close()
Esempio n. 7
0
    print(mc_mcmh_corr)
    print(mc_th_corr)
    print(std)
    mc_min = df["monte_carlo"].min()
    mc_max = df["monte_carlo"].max()

    # write log file
    log_data = args.__dict__
    log_data["mc_th_corr"] = mc_th_corr
    log_data["mc_mcmh_corr"] = mc_mcmh_corr
    log_data["mc_std"] = std
    log_data["mc_min"] = mc_min
    log_data["mc_max"] = mc_max
    log_data["exec_time_mc_mean"] = comp_time_mc.mean()
    log_data["exec_time_mc_std"] = comp_time_mc.std()
    log_data["exec_time_mh_mean"] = comp_time_mh.mean()
    log_data["exec_time_mh_std"] = comp_time_mh.std()
    log_data["exec_time_th_mean"] = comp_time_th.mean()
    log_data["exec_time_th_std"] = comp_time_th.std()

    # write average degree and standard deviation, max, min
    degrees = np.array(list(dict(G.degree()).values()))
    log_data["degree_mean"] = degrees.mean()
    log_data["degree_std"] = degrees.std()
    log_data["degree_min"] = degrees.min()
    log_data["degree_max"] = degrees.max()
    log_data["degree_median"] = np.median(degrees)

    logfile_name = args.out_dir + "/" + "log.csv"
    dict2csv(log_data, logfile_name)