Example #1
0
    def set_login_verify(self):
        """ 遇到验证码登录 """
        index = 3
        while index:
            try:
                time.sleep(0.5)
                log.info("get captcha_img user: {}, index: {}".format(self.username, index))
                newVcodeIframe = self.driver.find_element_by_xpath('''//div[@id="newVcodeIframe"]/iframe[1]''')
                self.driver.switch_to.frame(newVcodeIframe)

                captcha_img = self.set_login_save_img('capImg')
                rs, verify_code = get_qq_captcha_code(captcha_img)
                log.info(
                    'login user: {} captcha_img: {}, verifycode: {}'.format(self.username, captcha_img, verify_code))
                if not rs:
                    log.error('login user: {}, verify img fail'.format(self.username))
                    index -= 1
                    continue

                ele_verifycode = self.driver.find_element_by_id("capAns")
                ele_verifycode.send_keys(verify_code)
                self.driver.find_element_by_id("submit").click()
            except BaseException as e:
                log.error('user: %s, verifycode err, msg: %s' % (self.username, e))
                # log.error(traceback.format_exc())
                index -= 1
                if index == 1:
                    log.info("verify_login user: {}, retry login...".format(self.username))
                    self.set_login()
    def _ensure_initialized(self):
        if self.initialized:
            return

        num_attempts = 25
        attempt = 0
        for attempt in range(num_attempts):
            self.workers = [
                MultiAgentEnvWorker(i, self.make_env_func, self.env_config)
                for i in range(self.num_agents)
            ]

            try:
                port_to_use = udp_port_num(self.env_config)
                port = find_available_port(port_to_use, increment=1000)
                log.debug('Using port %d', port)
                init_info = dict(port=port)

                for i, worker in enumerate(self.workers):
                    worker.task_queue.put((init_info, TaskType.INIT))
                    if self.safe_init:
                        time.sleep(1.0)  # just in case
                    else:
                        time.sleep(0.01)

                for i, worker in enumerate(self.workers):
                    worker.result_queue.get(timeout=5)
                    worker.result_queue.task_done()
                    worker.task_queue.join()
            except Exception as exc:
                for worker in self.workers:
                    if isinstance(worker.process, threading.Thread):
                        log.info(
                            'We cannot really kill a thread, so let the whole process die'
                        )
                        raise RuntimeError(
                            'Critical error: worker stuck on initialization. Abort!'
                        )
                    else:
                        log.info('Killing process %r', worker.process.pid)
                        kill(worker.process.pid)
                del self.workers
                log.warning('Could not initialize env, try again! Error: %r',
                            exc)
                time.sleep(1)
            else:
                break

        if attempt >= num_attempts:
            log.error('Could not initialize env even after %d attempts. Fail!',
                      attempt)
            raise RuntimeError(
                'Critical error: worker stuck on initialization, num attempts exceeded. Abort!'
            )

        log.debug('%d agent workers initialized for env %d!',
                  len(self.workers), self.env_config.worker_index)
        log.debug('Took %d attempts!\n', attempt + 1)
        self.initialized = True
Example #3
0
 def wrapper(*args, **kwargs):
     for i in range(num_attempts):
         try:
             return func(*args, **kwargs)
         except exception_class as e:
             if i == num_attempts - 1:
                 raise
             else:
                 log.error('Failed with error %r, trying again', e)
                 sleep(sleep_time)
    def _write_dict_summaries(dictionary, writer, name, env_steps):
        for d, key, value in iterate_recursively(dictionary):
            if isinstance(value, bool):
                value = int(value)

            if isinstance(value, (int, float)):
                writer.add_scalar(f'zz_pbt/{name}_{key}', value, env_steps)
            elif isinstance(value, (tuple, list)):
                for i, tuple_value in enumerate(value):
                    writer.add_scalar(f'zz_pbt/{name}_{key}_{i}', tuple_value, env_steps)
            else:
                log.error('Unsupported type in pbt summaries %r', type(value))
    def _set_env_attr(self, env, player_id, attr_chain, value):
        """Allows us to set an arbitrary attribute of the environment, e.g. attr_chain can be unwrapped.foo.bar"""
        assert player_id == self.player_id

        attrs = attr_chain.split('.')
        curr_attr = env
        try:
            for attr_name in attrs[:-1]:
                curr_attr = getattr(curr_attr, attr_name)
        except AttributeError:
            log.error('Env does not have an attribute %s', attr_chain)

        attr_to_set = attrs[-1]
        setattr(curr_attr, attr_to_set, value)
    def _selected_weapon_rewards(self, selected_weapon, selected_weapon_ammo, deltas):
        # we must keep the weapon ready for a certain number of frames to get rewards
        unholstered = len(self.selected_weapon) > 4 and all(
            sw == selected_weapon for sw in self.selected_weapon)
        reward = 0.0

        if selected_weapon_ammo > 0 and unholstered:
            try:
                reward = self.reward_shaping_scheme['selected_weapon'][f'SELECTED{weapon}']
            except KeyError:
                log.error('%r', self.reward_shaping_scheme)
                log.error('%r', selected_weapon)
            weapon_key = f'weapon{selected_weapon}'
            deltas.append((weapon_key, reward))
            self.reward_structure[weapon_key] = self.reward_structure.get(weapon_key, 0.0) + reward

        return reward
Example #7
0
    def reset(self):
        self._ensure_initialized()

        if self.record_to is not None and not self.is_multiplayer:
            # does not work in multiplayer (uses different mechanism)
            if not os.path.exists(self.record_to):
                os.makedirs(self.record_to)

            demo_path = self.demo_path(self._num_episodes)
            log.warning('Recording episode demo to %s', demo_path)
            self.game.new_episode(demo_path)
        else:
            if self._num_episodes > 0:
                # no demo recording (default)
                self.game.new_episode()

        self.state = self.game.get_state()
        img = None
        try:
            img = self.state.screen_buffer
        except AttributeError:
            # sometimes Doom does not return screen buffer at all??? Rare bug
            pass

        if img is None:
            log.error(
                'Game returned None screen buffer! This is not supposed to happen!'
            )
            img = self._black_screen()

        # Swap current and previous histogram
        if self.current_histogram is not None and self.previous_histogram is not None:
            swap = self.current_histogram
            self.current_histogram = self.previous_histogram
            self.previous_histogram = swap
            self.current_histogram.fill(0)

        self._actions_flattened = None
        self._last_episode_info = copy.deepcopy(self._prev_info)
        self._prev_info = None

        self._num_episodes += 1

        return np.transpose(img, (1, 2, 0))
        def wrapper(*args, **kwargs):
            for i in range(num_attempts):
                try:
                    return func(*args, **kwargs)
                except exception_class as e:
                    # This accesses the self instance variable
                    multiagent_wrapper_obj = args[0]
                    multiagent_wrapper_obj.initialized = False
                    multiagent_wrapper_obj.close()

                    # This is done to reset if it is in the step function
                    if should_reset:
                        multiagent_wrapper_obj.reset()

                    if i == num_attempts - 1:
                        raise
                    else:
                        log.error('Failed with error %r, trying again', e)
                        sleep(sleep_time)
Example #9
0
    def set_driver(self):
        """ 设置浏览器 """
        try:
            if self.platform == "linux":
                self.display = Display(visible=0, size=(800, 600))
                self.display.start()
            self.driver = webdriver.Firefox(executable_path=self.geckopath, firefox_profile=self.set_profile())
            self.driver.delete_all_cookies()
            # 防止页面加载个没完
            self.driver.set_page_load_timeout(300)
            self.driver.implicitly_wait(10)
            self.wait = WebDriverWait(self.driver, 30)

            # 设置初始登录页面
            self.driver.get(self.LOGIN_URL)
        except BaseException as e:
            self.quit()
            log.error(traceback.format_exc())
            raise LoginError("WebDriverException, can not set driver...")
Example #10
0
def read_seeds_file(filename, has_keys):
    seeds = []

    with open(filename, 'r') as seed_file:
        lines = seed_file.readlines()
        for line in lines:
            try:
                if has_keys:
                    seed, cache_key = line.split(' ')
                else:
                    seed = line

                seed = int(seed)
                seeds.append(seed)
            except Exception:
                log.error(
                    'Could not read seed value from the file! File potentially corrupted'
                )
                log.exception('Exception when reading seeds file')

    return seeds
Example #11
0
    def check(self, addrs):
        res = None
        index = 3
        while index:
            try:
                if index == 2: self.refresh()
                if index == 1: time.sleep(5)
                # 直接跳出所有frame
                self.driver.switch_to.default_content()

                # 点击写信
                # self.wait.until(EC.presence_of_element_located((By.ID, 'composebtn')))
                elem_but_w = self.driver.find_element_by_id("composebtn")
                elem_but_w.click()

                # 切换至右侧 主iframe
                main_Frame1 = self.driver.find_element_by_id("mainFrame")
                self.driver.switch_to.frame(main_Frame1)

                # 发件人
                check_addrs = "{};[email protected];".format(addrs) if addrs else "[email protected];"
                self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]/div[2]/input''').send_keys(check_addrs)

                count = 30
                while count:
                    _t = self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]''')
                    errors = _t.find_elements_by_css_selector("div.addr_base.addr_error")
                    res = [e.text.strip().replace(";", "") for e in errors]
                    if res and res[-1] == '*****@*****.**':
                        break
                    count -= 1
                    time.sleep(0.5)
                index = 0
            except BaseException as e:
                log.error('user: %s, check err, msg: %s' % (self.username, e))
                log.error(traceback.format_exc())
                index -= 1
        if res is None:
            self.is_login = False
        return res
    def _ensure_paths_to_goal_calculated(self, maps, goals):
        for env_i in range(self.params.num_envs):
            m = maps[env_i]
            goal = goals[env_i]
            if m is None or goal is None:
                continue

            curr_landmark = self.current_landmarks[env_i]
            if self.paths[env_i][curr_landmark] is not None:
                # shortest path for this environment is already calculated
                continue

            path = m.get_path(curr_landmark,
                              goal,
                              edge_weight=self.edge_weight)

            if path is None or len(path) <= 0:
                log.error('Nodes: %r', list(m.graph.nodes))
                log.error('Path %r', path)
                log.error('Current landmark: %d', curr_landmark)
                log.error('Goal: %d', goal)

            assert path is not None and len(path) > 0

            curr_node = curr_landmark
            assert path[0] == curr_node
            for next_node in path[1:]:
                if self.paths[env_i][curr_node] is not None:
                    # next target for the rest of the path is already known
                    break

                self.paths[env_i][curr_node] = next_node
                curr_node = next_node

            assert path[-1] == goal
            self.paths[env_i][
                goal] = goal  # once we're already there let the path be trivial
Example #13
0
def evaluate_locomotion_agent(agent, multi_env):
    num_envs = multi_env.num_envs

    observations = main_observation(multi_env.reset())
    obs_prev = observations
    infos = multi_env.info()

    agent.tmax_mgr.initialize(observations, infos, 1)
    m = agent.tmax_mgr.dense_persistent_maps[-1]

    navigator = Navigator(agent)
    for env_i in range(num_envs):
        navigator.reset(env_i, m)

    # sample final goals
    all_targets = list(m.graph.nodes)
    if len(all_targets) > 0:
        all_targets.remove(0)

    final_goal_idx = random.sample(all_targets, num_envs)
    log.info('Goals: %r', final_goal_idx)

    # noinspection PyProtectedMember
    navigator._ensure_paths_to_goal_calculated([m] * num_envs, final_goal_idx)
    path_lengths = [0] * num_envs
    for env_i in range(num_envs):
        location, path_length = 0, 0
        while location != final_goal_idx[env_i]:
            location = navigator.paths[env_i][location]
            path_length += 1
        path_lengths[env_i] = path_length

    frames = 0
    next_target, next_target_d = navigator.get_next_target(
        [m] * num_envs, observations, final_goal_idx, [frames] * num_envs,
    )
    next_target_obs = [m.get_observation(t) for t in next_target]

    avg_speed = [-1] * num_envs
    success = [False] * num_envs

    t = Timing()
    while True:
        with t.timeit('frame'):
            with t.timeit('policy'):
                actions = policy_step(agent, obs_prev, observations, next_target_obs, final_goal_idx)

            with t.timeit('step'):
                env_obs, rew, done, info = multi_env.step(actions)

            obs_prev = observations
            observations = main_observation(env_obs)

            with t.timeit('navigator'):
                next_target, next_target_d = navigator.get_next_target(
                    [m] * num_envs, observations, final_goal_idx, [frames] * num_envs,
                )

            for env_i in range(num_envs):
                if final_goal_idx[env_i] is None:
                    continue

                if next_target[env_i] is None:
                    log.warning(
                        'Agent %d got lost in %d steps trying to reach %d', env_i, frames, final_goal_idx[env_i],
                    )
                    final_goal_idx[env_i] = None
                else:
                    if next_target[env_i] == final_goal_idx[env_i] and next_target_d[env_i] < 0.1:
                        success[env_i] = True
                        avg_speed[env_i] = path_lengths[env_i] / (frames + 1)
                        log.debug(
                            'Agent %d reached goal %d in %d steps, avg. speed %.3f',
                            env_i, final_goal_idx[env_i], frames, avg_speed[env_i],
                        )
                        final_goal_idx[env_i] = None

                    next_target_obs[env_i] = m.get_observation(next_target[env_i])

            frames += 1
            if frames > 5000:
                log.error('Timeout! 5000 frames was not enough to finish locomotion!')
                break

        finished = [g is None for g in final_goal_idx]
        if all(finished):
            log.info('Done!')
            break
        else:
            if frames % 10 == 0:
                frame_repeat = 4
                fps = (1.0 / t.frame) * frame_repeat * num_envs
                log.info('%d agents remaining, fps %.3f, time %s', num_envs - sum(finished), fps, t)

    return success, avg_speed
Example #14
0
    def _run(self):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        try:
            psutil.Process().nice(self.cfg.default_niceness)
        except psutil.AccessDenied:
            log.error('Low niceness requires sudo!')

        if self.cfg.device == 'gpu':
            cuda_envvars(self.policy_id)

        torch.multiprocessing.set_sharing_strategy('file_system')
        torch.set_num_threads(self.cfg.learner_main_loop_num_cores)

        timing = Timing()

        rollouts = []

        if self.train_in_background:
            self.training_thread.start()
        else:
            self.initialize(timing)
            log.error(
                'train_in_background set to False on learner %d! This is slow, use only for testing!',
                self.policy_id,
            )

        while not self.terminate:
            while True:
                try:
                    tasks = self.task_queue.get_many(timeout=0.005)

                    for task_type, data in tasks:
                        if task_type == TaskType.TRAIN:
                            with timing.add_time('extract'):
                                rollouts.extend(self._extract_rollouts(data))
                                # log.debug('Learner %d has %d rollouts', self.policy_id, len(rollouts))
                        elif task_type == TaskType.INIT:
                            self._init()
                        elif task_type == TaskType.TERMINATE:
                            time.sleep(0.3)
                            log.info('GPU learner timing: %s', timing)
                            self._terminate()
                            break
                        elif task_type == TaskType.PBT:
                            self._process_pbt_task(data)
                except Empty:
                    break

            if self._accumulated_too_much_experience(rollouts):
                # if we accumulated too much experience, signal the policy workers to stop experience collection
                if not self.stop_experience_collection[self.policy_id]:
                    log.debug(
                        'Learner %d accumulated too much experience, stop experience collection!',
                        self.policy_id)
                self.stop_experience_collection[self.policy_id] = True
            elif self.stop_experience_collection[self.policy_id]:
                # otherwise, resume the experience collection if it was stopped
                self.stop_experience_collection[self.policy_id] = False
                with self.resume_experience_collection_cv:
                    log.debug('Learner %d is resuming experience collection!',
                              self.policy_id)
                    self.resume_experience_collection_cv.notify_all()

            with torch.no_grad():
                rollouts = self._process_rollouts(rollouts, timing)

            if not self.train_in_background:
                while not self.experience_buffer_queue.empty():
                    training_data = self.experience_buffer_queue.get()
                    self._process_training_data(training_data, timing)

            self._experience_collection_rate_stats()

        if self.train_in_background:
            self.experience_buffer_queue.put(None)
            self.training_thread.join()
Example #15
0
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        timing = Timing()

        from threadpoolctl import threadpool_limits
        with threadpool_limits(limits=1, user_api=None):
            if self.cfg.set_workers_cpu_affinity:
                set_process_cpu_affinity(proc_idx, self.cfg.num_workers)

            initial_cpu_affinity = psutil.Process().cpu_affinity(
            ) if platform != 'darwin' else None
            psutil.Process().nice(10)

            with timing.timeit('env_init'):
                envs = []
                env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)]

                for env_idx in range(self.cfg.num_envs_per_worker):
                    global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx
                    env_config = AttrDict(worker_index=proc_idx,
                                          vector_index=env_idx,
                                          env_id=global_env_id)
                    env = create_env(self.cfg.env,
                                     cfg=self.cfg,
                                     env_config=env_config)
                    log.debug(
                        'CPU affinity after create_env: %r',
                        psutil.Process().cpu_affinity()
                        if platform != 'darwin' else 'MacOS - None')
                    env.seed(global_env_id)
                    envs.append(env)

                    # this is to track the performance for individual DMLab levels
                    if hasattr(env.unwrapped, 'level_name'):
                        env_key[env_idx] = env.unwrapped.level_name

                episode_length = [0 for _ in envs]
                episode_lengths = [deque([], maxlen=20) for _ in envs]

            try:
                with timing.timeit('first_reset'):
                    for env_idx, env in enumerate(envs):
                        env.reset()
                        log.info('Process %d finished resetting %d/%d envs',
                                 proc_idx, env_idx + 1, len(envs))

                    self.report_queue.put(
                        dict(proc_idx=proc_idx, finished_reset=True))

                self.start_event.wait()

                with timing.timeit('work'):
                    last_report = last_report_frames = total_env_frames = 0
                    while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                        for env_idx, env in enumerate(envs):
                            action = env.action_space.sample()
                            with timing.add_time(f'{env_key[env_idx]}.step'):
                                obs, reward, done, info = env.step(action)

                            num_frames = info.get('num_frames', 1)
                            total_env_frames += num_frames
                            episode_length[env_idx] += num_frames

                            if done:
                                with timing.add_time(
                                        f'{env_key[env_idx]}.reset'):
                                    env.reset()

                                episode_lengths[env_idx].append(
                                    episode_length[env_idx])
                                episode_length[env_idx] = 0

                        with timing.add_time('report'):
                            now = time.time()
                            if now - last_report > self.report_every_sec:
                                last_report = now
                                frames_since_last_report = total_env_frames - last_report_frames
                                last_report_frames = total_env_frames
                                self.report_queue.put(
                                    dict(proc_idx=proc_idx,
                                         env_frames=frames_since_last_report))

                # Extra check to make sure cpu affinity is preserved throughout the execution.
                # I observed weird effect when some environments tried to alter affinity of the current process, leading
                # to decreased performance.
                # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc.
                # At least user should know about it if this is happening.
                cpu_affinity = psutil.Process().cpu_affinity(
                ) if platform != 'darwin' else None
                assert initial_cpu_affinity == cpu_affinity, \
                    f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \
                    f'This can significantly affect performance!'

            except:
                log.exception('Unknown exception')
                log.error('Unknown exception in worker %d, terminating...',
                          proc_idx)
                self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

            time.sleep(proc_idx * 0.01 + 0.01)
            log.info('Process %d finished sampling. Timing: %s', proc_idx,
                     timing)

            for env_idx, env in enumerate(envs):
                if len(episode_lengths[env_idx]) > 0:
                    log.warning('Level %s avg episode len %d',
                                env_key[env_idx],
                                np.mean(episode_lengths[env_idx]))

            for env in envs:
                env.close()
Example #16
0
    def run(self):
        for p in self.processes:
            p.start()

        finished_reset = np.zeros([self.cfg.num_workers], dtype=np.bool)
        while not all(finished_reset):
            try:
                msg = self.report_queue.get(timeout=0.1)
                if 'finished_reset' in msg:
                    finished_reset[msg['proc_idx']] = True
                    log.debug('Process %d finished reset! Status %r',
                              msg['proc_idx'], finished_reset)
            except Empty:
                pass

        log.debug('All workers finished reset!')
        time.sleep(3)
        self.start_event.set()

        start = time.time()
        env_frames = 0
        last_process_report = [time.time() for _ in self.processes]

        while not self.terminate.value:
            try:
                try:
                    msgs = self.report_queue.get_many(
                        timeout=self.report_every_sec * 1.5)
                    for msg in msgs:
                        last_process_report[msg['proc_idx']] = time.time()

                        if 'crash' in msg:
                            self.terminate.value = True
                            log.error(
                                'Terminating due to process %d crashing...',
                                msg['proc_idx'])
                            break

                        env_frames += msg['env_frames']

                    if env_frames >= self.cfg.sample_env_frames:
                        self.terminate.value = True
                except Empty:
                    pass
            except KeyboardInterrupt:
                self.terminate.value = True
                log.error('KeyboardInterrupt in main loop! Terminating...')
                break

            if time.time() - self.last_report > self.report_every_sec:
                self.report(env_frames)

            for proc_idx, p in enumerate(self.processes):
                delay = time.time() - last_process_report[proc_idx]
                if delay > 600:
                    # killing the whole script is the best way to know that some of the processes froze
                    log.error(
                        'Process %d had not responded in %.1f s!!! Terminating...',
                        proc_idx, delay)
                    self.terminate.value = True

            for p in self.processes:
                if not p.is_alive():
                    self.terminate.value = True
                    log.error('Process %r died! terminating...', p)

        total_time = time.time() - start
        log.info('Collected %d frames in %.1f s, avg FPS: %.1f', env_frames,
                 total_time, env_frames / total_time)
        log.debug('Done sampling...')
Example #17
0
def run(run_description, args):
    experiments = run_description.experiments
    train_dir = run_description.train_dir
    max_parallel = args.max_parallel

    log.info('Starting processes with base cmds: %r',
             [e.cmd for e in experiments])
    log.info('Max parallel processes is %d', max_parallel)
    log.info(
        'Monitor log files using\n\n\ttail -f train_dir/%s/**/**/log.txt\n\n',
        run_description.run_name)

    processes = []
    processes_per_gpu = {g: [] for g in range(args.num_gpus)}

    experiments = run_description.generate_experiments()
    next_experiment = next(experiments, None)

    def find_least_busy_gpu():
        least_busy_gpu = None
        gpu_available_processes = 0

        for gpu_id in range(args.num_gpus):
            available_processes = args.experiments_per_gpu - len(
                processes_per_gpu[gpu_id])
            if available_processes > gpu_available_processes:
                gpu_available_processes = available_processes
                least_busy_gpu = gpu_id

        return least_busy_gpu, gpu_available_processes

    def can_squeeze_another_process():
        if len(processes) >= max_parallel:
            return False

        if args.experiments_per_gpu > 0:
            least_busy_gpu, gpu_available_processes = find_least_busy_gpu()
            if gpu_available_processes <= 0:
                return False

        return True

    while len(processes) > 0 or next_experiment is not None:
        while can_squeeze_another_process() and next_experiment is not None:
            cmd, name, root_dir, exp_env_vars = next_experiment

            cmd_tokens = cmd.split(' ')

            # workaround to make sure we're running the correct python executable from our virtual env
            if cmd_tokens[0] == 'python':
                cmd_tokens[0] = sys.executable
                log.debug('Using Python executable %s', cmd_tokens[0])

            experiment_dir = ensure_dir_exists(join(train_dir, root_dir, name))
            logfile = open(join(experiment_dir, 'log.txt'), 'wb')
            envvars = os.environ.copy()

            best_gpu = None
            if args.experiments_per_gpu > 0:
                best_gpu, best_gpu_available_processes = find_least_busy_gpu()
                log.info(
                    'The least busy gpu is %d where we can run %d more processes',
                    best_gpu,
                    best_gpu_available_processes,
                )
                envvars['CUDA_VISIBLE_DEVICES'] = f'{best_gpu}'

            log.info('Starting process %r', cmd_tokens)

            if exp_env_vars is not None:
                for key, value in exp_env_vars.items():
                    log.info('Adding env variable %r %r', key, value)
                    envvars[str(key)] = str(value)

            process = subprocess.Popen(cmd_tokens,
                                       stdout=logfile,
                                       stderr=logfile,
                                       env=envvars)
            process.process_logfile = logfile
            process.gpu_id = best_gpu
            process.proc_cmd = cmd

            processes.append(process)

            if process.gpu_id is not None:
                processes_per_gpu[process.gpu_id].append(process.proc_cmd)

            log.info('Started process %s on GPU %r', process.proc_cmd,
                     process.gpu_id)
            log.info('Waiting for %d seconds before starting next process',
                     args.pause_between)
            time.sleep(args.pause_between)

            next_experiment = next(experiments, None)

        remaining_processes = []
        for process in processes:
            if process.poll() is None:
                remaining_processes.append(process)
                continue
            else:
                if process.gpu_id is not None:
                    processes_per_gpu[process.gpu_id].remove(process.proc_cmd)
                process.process_logfile.close()
                log.info('Process %r finished with code %r', process.proc_cmd,
                         process.returncode)
                if process.returncode != 0:
                    log.error('WARNING: RETURN CODE IS %r', process.returncode)

        processes = remaining_processes
        time.sleep(0.1)

    log.info('Done!')

    return 0
def get_config():

    # http://10.8.128.28/api/v2/ --user admin --pass admin123

    username = "******"
    password = "******"
    uri = "https://10.8.128.63:8002/api/v2/"

    make_machines = MakeMachines()

    admin_node = make_machines.admin()
    monsL = make_machines.mon()
    osdL = make_machines.osd()

    try:
        # login
        c = AuthenticatedHttpClient(uri, username, password)
        c.login()

        # base API get
        response = c.request("GET", "", verify=False)
        response.raise_for_status()
        pretty_response = json.dumps(response.json(), indent=2)
        log.debug(pretty_response)

        # API Cluster response
        cluster_response = c.request("GET", "cluster", verify=False)
        cluster_response.raise_for_status()
        pretty_cluster_details = json.dumps(cluster_response.json(), indent=2)
        pretty_cluster_json = json.loads(pretty_cluster_details)[0]

        # API grains
        # log.info('api grains list')
        # log.info('--------------------')
        # info_response = c.request('GET', 'info', verify=False)
        # info_response.raise_for_status()
        # pretty_info_response = json.dumps(info_response.json(), indent=2)
        # pretty_info_response_json = json.loads(pretty_info_response)
        # log.debug(pretty_info_response_json)
        # log.debug('-------------------------')

        # API User list
        log.info("api users list")
        log.info("--------------------")
        user_response = c.request("GET", "user", verify=False)
        user_response.raise_for_status()
        pretty_user_response = json.dumps(user_response.json(), indent=2)
        pretty_user_response_json = json.loads(pretty_user_response)
        log.debug(pretty_user_response_json)

        # API grains
        # log.info('api grains list')
        # log.info('--------------------')
        # response_grains = c.request('GET', 'grains', verify=False)
        # response_grains.raise_for_status()
        # pretty_response_grains = json.dumps(response_grains.json(), indent=2)
        # pretty_response_grains_json = json.loads(pretty_response_grains)
        # #log.debug(pretty_response_grains_json)
        # log.debug('-------------------------')

        # API cluster with fsid
        cluster_with_fsid_api = "cluster" + "/" + pretty_cluster_json["id"]
        log.debug("cluster with fsid %s:" % cluster_with_fsid_api)

        cluster_id_response = c.request("GET",
                                        cluster_with_fsid_api,
                                        verify=False)
        cluster_id_response.raise_for_status()
        pretty_cluster_id = json.dumps(cluster_id_response.json(), indent=2)
        pretty_cluster_id_json = json.loads(pretty_cluster_id)
        log.debug("pretty cluster_id json data %s" % pretty_cluster_id_json)

        config_data = {
            "auth": c,
            "fsid": pretty_cluster_json["id"],
            "admin_node": admin_node,
            "monsL": monsL,
            "osdL": osdL,
        }

    except Exception, e:
        log.error("error in auth")
        log.error(e)

        config_data = {"auth": None}
Example #19
0
    def send_email(self, addrs, subject, content, subtype="html"):
        try:
            self.driver.switch_to.default_content()

            # 点击写信
            # self.wait.until(EC.presence_of_element_located((By.ID, 'composebtn')))
            elem_but_w = self.driver.find_element_by_id("composebtn")
            elem_but_w.click()

            # 切换至右侧 主iframe
            main_Frame1 = self.driver.find_element_by_id("mainFrame")
            self.driver.switch_to.frame(main_Frame1)

            # 发件人
            self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]/div[2]/input''').send_keys(addrs)
            # 输入主题
            # self.driver.find_element_by_xpath('''//input[@id="subject"]''').send_keys(subject)
            self.driver.find_element_by_id('subject').send_keys(subject)
            # self.driver.find_element_by_xpath('''//input[@id="subject"]''').send_keys(subject)

            # 输入正文
            o = self.driver.find_elements_by_class_name("qmEditorIfrmEditArea")
            o[0].click()  # !!!!!!!must click!!!!!!!
            o[0].send_keys(content)

            time.sleep(1)

            # 点击发送按钮
            self.driver.find_element_by_xpath("//*[@id='toolbar']/div/a[1]").click()
            # driver.find_element_by_xpath('//a[@name="sendbtn" and @tabindex="9"]').click()

            time.sleep(3)
            # 断言发送成功
            assert "再写一封" in self.driver.page_source

        except:
            log.error("弹出验证框")
            self.refresh()
            return

            try:
                self.driver.switch_to.default_content()

                log.error("弹出验证框")
                # time.sleep(600)
                captcha_img = self.set_login_save_img('QMVerify_QMDialog_verify_img_code')
                rs, verify_code = get_qq_captcha_code(captcha_img)
                log.info(
                    'send email user: {} captcha_img: {}, verifycode: {}'.format(
                        self.username, captcha_img, verify_code))
                if not rs:
                    log.error('login user: {}, verify img fail'.format(self.username))
                    raise

                ele_verifycode = self.driver.find_element_by_id("QMVerify_QMDialog_verifycodeinput")
                ele_verifycode.send_keys(verify_code)
                self.driver.find_element_by_id("QMVerify_QMDialog_btnConfirm").click()

                time.sleep(3)
                assert "再写一封" in self.driver.page_source
            except:
                log.error(traceback.format_exc())
                self.is_login = False
                time.sleep(3600)
                # 关闭浏览器
                self.quit()
Example #20
0
def test_locomotion(params, env_id):
    def make_env_func():
        e = create_env(env_id, skip_frames=True)
        e.seed(0)
        return e

    # params = params.load()
    # params.ensure_serialized()

    params.num_envs = 1
    # params.naive_locomotion = True

    agent = AgentTMAX(make_env_func, params)

    agent.initialize()

    env = make_env_func()

    env_obs, info = reset_with_info(env)
    obs_prev = obs = main_observation(env_obs)
    done = False

    if params.persistent_map_checkpoint is not None:
        loaded_persistent_map = TopologicalMap.create_empty()
        loaded_persistent_map.maybe_load_checkpoint(
            params.persistent_map_checkpoint)
    else:
        agent.tmax_mgr.initialize([obs], [info], 1)
        loaded_persistent_map = agent.tmax_mgr.dense_persistent_maps[-1]

    m = loaded_persistent_map

    t = Timing()

    log.info('Num landmarks: %d', m.num_landmarks())
    final_goal_idx = 49

    log.info('Locomotion goal is %d', final_goal_idx)

    # localizer = Localizer(m, agent)

    final_goal_obs = m.get_observation(final_goal_idx)
    cv2.namedWindow('next_target')
    cv2.moveWindow('next_target', 800, 100)
    cv2.namedWindow('final_goal')
    cv2.moveWindow('final_goal', 1400, 100)
    display_obs('next_target', obs)
    display_obs('final_goal', final_goal_obs)
    cv2.waitKey(1)

    # localizer.current_landmark = 0
    # next_target = localizer.get_next_target(obs, final_goal_idx)
    # next_target_obs = m.get_observation(next_target)

    frame = 0

    if params.naive_locomotion:
        navigator = NavigatorNaive(agent)
    else:
        navigator = Navigator(agent)

    navigator.reset(0, m)

    next_target, next_target_d = navigator.get_next_target(
        [m],
        [obs],
        [final_goal_idx],
        [frame],
    )
    next_target, next_target_d = next_target[0], next_target_d[0]
    next_target_obs = m.get_observation(next_target)

    while not done and not terminate:
        with t.timeit('one_frame'):
            env.render()
            if not pause:
                if random.random() < 0.5:
                    deterministic = False
                else:
                    deterministic = True

                if params.naive_locomotion:
                    action = navigator.replay_action([0])[0]
                else:
                    action = agent.locomotion.navigate(
                        agent.session,
                        [obs_prev],
                        [obs],
                        [next_target_obs],
                        deterministic=deterministic,
                    )[0]

                env_obs, rew, done, info = env.step(action)

                log.info('Action is %d', action)
                obs_prev = obs
                obs = main_observation(env_obs)

                next_target, next_target_d = navigator.get_next_target(
                    [m],
                    [obs],
                    [final_goal_idx],
                    [frame],
                )
                next_target, next_target_d = next_target[0], next_target_d[0]
                if next_target is None:
                    log.error('We are lost!')
                else:
                    log.info('Next target is %d with distance %.3f!',
                             next_target, next_target_d)
                    display_obs('next_target', next_target_obs)
                    cv2.waitKey(1)

                if next_target is not None:
                    next_target_obs = m.get_observation(next_target)

                log.info('Frame %d...', frame)

        took_seconds = t.one_frame
        desired_fps = 10
        wait_seconds = (1.0 / desired_fps) - took_seconds
        wait_seconds = max(0.0, wait_seconds)
        if wait_seconds > EPS:
            time.sleep(wait_seconds)

        if not pause:
            frame += 1

    log.info('After loop')

    env.render()
    time.sleep(0.05)

    env.close()
    agent.finalize()
    return 0
def main():
    """Script entry point."""
    stop_at = 80 * 1000 * 1000
    prefix = 'simple'

    # noinspection PyUnusedLocal
    experiments_very_sparse = [
        Experiment('doom_curious_vs_vanilla/doom_maze_very_sparse/doom_maze_very_sparse_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_sweep_very_sparse/doom_sweep_i_0.5_p_0.05', 'A2C+ICM (curious)'),
    ]

    # noinspection PyUnusedLocal
    experiments_sparse = [
        Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.05', 'A2C+ICM (curious)'),
    ]

    # noinspection PyUnusedLocal
    experiments_basic = [
        Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.05', 'A2C+ICM (curious)'),
    ]

    experiments = [
        Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.05', 'A2C+ICM (curious)'),
    ]

    plots = [
        Plot('a2c_aux_summary/avg_reward', 'average reward', 'Avg. reward for the last 1000 episodes'),
        Plot(
            'a2c_agent_summary/policy_entropy',
            'policy entropy, nats',
            'Stochastic policy entropy',
        ),
    ]

    for plot in plots:
        fig = plt.figure(figsize=(5, 4))
        fig.add_subplot()

        for ex_i, experiment in enumerate(experiments):
            experiment_name = experiment.name.split(os.sep)[-1]
            experiments_root = join(*(experiment.name.split(os.sep)[:-1]))
            exp_dir = experiment_dir(experiment_name, experiments_root)

            path_to_events_dir = summaries_dir(exp_dir)
            events_files = []
            for f in os.listdir(path_to_events_dir):
                if f.startswith('events'):
                    events_files.append(join(path_to_events_dir, f))

            if len(events_files) == 0:
                log.error('No events file for %s', experiment)
                continue

            events_files = sorted(events_files)
            steps, values = [], []

            for events_file in events_files:
                iterator = tf.train.summary_iterator(events_file)
                while True:
                    try:
                        e = next(iterator, None)
                    except Exception as exc:
                        log.warning(str(exc))
                        break

                    if e is None:
                        break

                    for v in e.summary.value:
                        if e.step >= stop_at:
                            break

                        if v.tag == plot.name:
                            steps.append(e.step)
                            values.append(v.simple_value)

            # just in case
            values = np.nan_to_num(values)

            smooth = 10
            values_smooth = running_mean(values, smooth)
            steps = steps[smooth:]
            values = values[smooth:]

            plt.plot(steps, values, color=COLORS[ex_i], alpha=0.2, label='__nolegend__')
            plt.plot(steps, values_smooth, color=COLORS[ex_i], label=experiment.descr, linewidth=2)

        plt.xlabel('environment steps')
        plt.ylabel(plot.axis)
        plt.title(plot.descr)
        plt.grid(True)
        plt.legend()
        plt.tight_layout()

        plots_dir = ensure_dir_exists(join(experiments_dir(), 'plots'))
        plot_name = plot.name.replace('/', '_')
        plt.savefig(join(plots_dir, f'{prefix}_{plot_name}.png'))
        plt.close()

    return 0
Example #22
0
def get_config():

    # http://10.8.128.28/api/v2/ --user admin --pass admin123

    username = '******'
    password = '******'
    uri = 'https://10.8.128.63:8002/api/v2/'

    make_machines = MakeMachines()

    admin_node = make_machines.admin()
    monsL = make_machines.mon()
    osdL = make_machines.osd()

    try:
        # login
        c = AuthenticatedHttpClient(uri, username, password)
        c.login()

        # base API get
        response = c.request('GET', '' , verify=False)
        response.raise_for_status()
        pretty_response = json.dumps(response.json(), indent=2)
        log.debug(pretty_response)


        # API Cluster response
        cluster_response = c.request('GET', 'cluster', verify=False)
        cluster_response.raise_for_status()
        pretty_cluster_details = json.dumps(cluster_response.json(), indent=2)
        pretty_cluster_json = json.loads(pretty_cluster_details)[0]

        # API grains
        # log.info('api grains list')
        # log.info('--------------------')
        # info_response = c.request('GET', 'info', verify=False)
        # info_response.raise_for_status()
        # pretty_info_response = json.dumps(info_response.json(), indent=2)
        # pretty_info_response_json = json.loads(pretty_info_response)
        # log.debug(pretty_info_response_json)
        # log.debug('-------------------------')


        # API User list
        log.info('api users list')
        log.info('--------------------')
        user_response = c.request('GET', 'user', verify=False)
        user_response.raise_for_status()
        pretty_user_response = json.dumps(user_response.json(), indent=2)
        pretty_user_response_json = json.loads(pretty_user_response)
        log.debug(pretty_user_response_json)

        # API grains
        # log.info('api grains list')
        # log.info('--------------------')
        # response_grains = c.request('GET', 'grains', verify=False)
        # response_grains.raise_for_status()
        # pretty_response_grains = json.dumps(response_grains.json(), indent=2)
        # pretty_response_grains_json = json.loads(pretty_response_grains)
        # #log.debug(pretty_response_grains_json)
        # log.debug('-------------------------')

        # API cluster with fsid
        cluster_with_fsid_api = 'cluster' + '/' + pretty_cluster_json['id']
        log.debug('cluster with fsid %s:' %cluster_with_fsid_api )

        cluster_id_response = c.request('GET', cluster_with_fsid_api, verify=False)
        cluster_id_response.raise_for_status()
        pretty_cluster_id = json.dumps(cluster_id_response.json(), indent=2)
        pretty_cluster_id_json = json.loads(pretty_cluster_id)
        log.debug('pretty cluster_id json data %s' %pretty_cluster_id_json)

        config_data ={'auth': c,
              'fsid': pretty_cluster_json['id'],
              'admin_node': admin_node,
              'monsL': monsL,
              'osdL': osdL
              }

    except Exception, e:
        log.error('error in auth')
        log.error(e)

        config_data ={'auth': None}
Example #23
0
    def init_subset(self, indices, actor_queues):
        """
        Initialize a subset of actor workers (rollout workers) and wait until the first reset() is completed for all
        envs on these workers.

        This function will retry if the worker process crashes during the initial reset.

        :param indices: indices of actor workers to initialize
        :param actor_queues: task queues corresponding to these workers
        :return: initialized workers
        """

        reset_timelimit_seconds = self.cfg.reset_timeout_seconds  # fail worker if not a single env was reset in that time

        workers = dict()
        last_env_initialized = dict()
        for i in indices:
            w = self.create_actor_worker(i, actor_queues[i])
            w.init()
            w.request_reset()
            workers[i] = w
            last_env_initialized[i] = time.time()

        total_num_envs = self.cfg.num_workers * self.cfg.num_envs_per_worker
        envs_initialized = [0] * self.cfg.num_workers
        workers_finished = set()

        while len(workers_finished) < len(workers):
            failed_worker = -1

            try:
                report = self.report_queue.get(timeout=1.0)

                if 'initialized_env' in report:
                    worker_idx, split_idx, env_i = report['initialized_env']
                    last_env_initialized[worker_idx] = time.time()
                    envs_initialized[worker_idx] += 1

                    log.debug(
                        'Progress for %d workers: %d/%d envs initialized...',
                        len(indices),
                        sum(envs_initialized),
                        total_num_envs,
                    )
                elif 'finished_reset' in report:
                    workers_finished.add(report['finished_reset'])
                elif 'critical_error' in report:
                    failed_worker = report['critical_error']
            except Empty:
                pass

            for worker_idx, w in workers.items():
                if worker_idx in workers_finished:
                    continue

                time_passed = time.time() - last_env_initialized[worker_idx]
                timeout = time_passed > reset_timelimit_seconds

                if timeout or failed_worker == worker_idx or not w.process.is_alive(
                ):
                    envs_initialized[worker_idx] = 0

                    log.error('Worker %d is stuck or failed (%.3f). Reset!',
                              w.worker_idx, time_passed)
                    log.debug('Status: %r', w.process.is_alive())
                    stuck_worker = w
                    stuck_worker.process.kill()

                    new_worker = self.create_actor_worker(
                        worker_idx, actor_queues[worker_idx])
                    new_worker.init()
                    new_worker.request_reset()

                    last_env_initialized[worker_idx] = time.time()
                    workers[worker_idx] = new_worker
                    del stuck_worker

        return workers.values()
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        timing = Timing()

        psutil.Process().nice(10)

        num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE)
        assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...'
        assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker'

        with timing.timeit('env_init'):
            env_key = 'env'
            env_desired_num_levels = 0

            global_env_id = proc_idx * self.cfg.num_envs_per_worker
            env_config = AttrDict(worker_index=proc_idx, vector_index=0, env_id=global_env_id)
            env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config)
            env.seed(global_env_id)

            # this is to track the performance for individual DMLab levels
            if hasattr(env.unwrapped, 'level_name'):
                env_key = env.unwrapped.level_name
                env_level = env.unwrapped.level

                approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[
                    env_key]
                num_billions = DESIRED_TRAINING_LENGTH / int(1e9)
                num_workers_for_env = self.cfg.num_workers // num_envs
                env_desired_num_levels = int(
                    (approx_num_episodes_per_1b_frames * num_billions) / num_workers_for_env)

                env_num_levels_generated = len(dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0].
                                               all_seeds[env_level]) // num_workers_for_env

                log.warning('Worker %d (env %s) generated %d/%d levels!',
                            proc_idx,
                            env_key,
                            env_num_levels_generated,
                            env_desired_num_levels)
                time.sleep(4)

            env.reset()
            env_uses_level_cache = env.unwrapped.env_uses_level_cache

            self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True))

        self.start_event.wait()

        try:
            with timing.timeit('work'):
                last_report = last_report_frames = total_env_frames = 0
                while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                    action = env.action_space.sample()
                    with timing.add_time(f'{env_key}.step'):
                        env.step(action)

                    total_env_frames += 1

                    with timing.add_time(f'{env_key}.reset'):
                        env.reset()
                        env_num_levels_generated += 1
                        log.debug('Env %s done %d/%d resets',
                                  env_key,
                                  env_num_levels_generated,
                                  env_desired_num_levels)

                    if env_num_levels_generated >= env_desired_num_levels:
                        log.debug('%s finished %d/%d resets, sleeping...',
                                  env_key,
                                  env_num_levels_generated,
                                  env_desired_num_levels)
                        time.sleep(30)  # free up CPU time for other envs

                    # if env does not use level cache, there is no need to run it
                    # let other workers proceed
                    if not env_uses_level_cache:
                        log.debug('Env %s does not require cache, sleeping...', env_key)
                        time.sleep(200)

                    with timing.add_time('report'):
                        now = time.time()
                        if now - last_report > self.report_every_sec:
                            last_report = now
                            frames_since_last_report = total_env_frames - last_report_frames
                            last_report_frames = total_env_frames
                            self.report_queue.put(
                                dict(proc_idx=proc_idx, env_frames=frames_since_last_report))

                            if get_free_disk_space_mb(self.cfg) < 3 * 1024:
                                log.error('Not enough disk space! %d',
                                          get_free_disk_space_mb(self.cfg))
                                time.sleep(200)
        except:
            log.exception('Unknown exception')
            log.error('Unknown exception in worker %d, terminating...', proc_idx)
            self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

        time.sleep(proc_idx * 0.1 + 0.1)
        log.info('Process %d finished sampling. Timing: %s', proc_idx, timing)

        env.close()
Example #25
0
    def generate_bonus_rewards(self,
                               session,
                               obs,
                               next_obs,
                               actions,
                               dones,
                               infos,
                               mask=None):
        if self.explored_region_map is None:
            self.explored_region_map = TopologicalMap(obs[0],
                                                      directed_graph=False,
                                                      initial_info=infos[0])

        for i, episodic_map in enumerate(self.episodic_maps):
            if episodic_map is None:
                # noinspection PyTypeChecker
                self.episodic_maps[i] = copy.deepcopy(self.explored_region_map)

        for i in range(self.params.num_envs):
            if dones[i]:
                if self.params.expand_explored_region:
                    # save last n maps for later use
                    self.past_maps.append(copy.deepcopy(self.episodic_maps[i]))

                self.episode_bonuses.append(self.current_episode_bonus[i])
                self.current_episode_bonus[i] = 0

                if self.explored_region_map is not None:
                    # set the episodic map to be the map of the explored region, so we don't receive any more reward
                    # for seeing what we've already explored
                    # noinspection PyTypeChecker
                    self.episodic_maps[i] = copy.deepcopy(
                        self.explored_region_map)
                    for node in self.episodic_maps[i].graph.nodes:
                        self.episodic_maps[i].graph.nodes[node][
                            'added_at'] = -1
                else:
                    # we don't have a map of explored region, so reset episodic memory to zero
                    self.episodic_maps[i].reset(next_obs[i], infos[i])

                self.episodic_maps[i].new_episode()

                self.episode_frames[i] = 0
            else:
                self.episode_frames[i] += 1
                self.frames_analyzed += 1

        frames = self.episode_frames
        bonuses = np.full(self.params.num_envs,
                          fill_value=self.params.per_step_intrinsic_reward)
        with_sparse_reward = self.params.ecr_map_sparse_reward

        if self.initialized:
            # noinspection PyUnusedLocal
            def on_new_landmark(env_i_, new_landmark_idx):
                if with_sparse_reward:
                    bonuses[env_i_] += self.params.map_expansion_reward
                    self.landmarks_generated += 1

            if mask is None:
                maps = self.episodic_maps
            else:
                maps = [
                    self.episodic_maps[i] if mask[i] else None
                    for i in range(len(mask))
                ]

            self.localizer.new_landmark_threshold = self.new_landmark_threshold
            self.localizer.loop_closure_threshold = self.loop_closure_threshold
            distances_to_memory = self.localizer.localize(
                session,
                next_obs,
                infos,
                maps,
                self.distance,
                frames=frames,
                on_new_landmark=on_new_landmark,
            )

            if frames is not None:
                for env_i, m in enumerate(maps):
                    if m is None:
                        continue

                    if distances_to_memory[
                            env_i] < self.params.revisiting_threshold:
                        added_at = m.graph.nodes[m.curr_landmark_idx].get(
                            'added_at', -1)
                        if added_at == -1:
                            continue

                        if frames[
                                env_i] - added_at > self.params.revisit_num_frames:
                            bonuses[env_i] += self.params.revisiting_penalty

            # if bonuses[0] > 0:
            #     log.warning('Distances to memory: %.3f, bonuses: %.3f', distances_to_memory[0], bonuses[0])
            # else:
            #     log.info('Distances to memory: %.3f, bonuses: %.3f', distances_to_memory[0], bonuses[0])

            assert len(distances_to_memory) == len(next_obs)
            threshold = 1.0
            dense_rewards = np.array([
                0.0 if done else dist - threshold
                for (dist, done) in zip(distances_to_memory, dones)
            ])
            dense_rewards *= 0.05  # scaling factor

            if self.params.ecr_map_dense_reward:
                for i in range(self.params.num_envs):
                    if maps[i] is not None:
                        bonuses[i] += dense_rewards[i]

            if math.nan in bonuses:
                log.error('Bonuses: %r', bonuses)
                log.error('NaN values in bonus array!')

        self.current_episode_bonus += bonuses

        if self.params.ecr_map_adaptive_reward:
            if self.frames_analyzed >= 50000:
                ratio = self.landmarks_generated / self.frames_analyzed
                if ratio < 25 / 1000:
                    # make landmarks easier to find
                    self.new_landmark_threshold *= 0.95
                    self.loop_closure_threshold = 0.5 * self.new_landmark_threshold
                    log.info(
                        'Decreased landmark threshold to %.3f (%.3f)',
                        self.new_landmark_threshold,
                        self.loop_closure_threshold,
                    )
                elif ratio > 40 / 1000:
                    not_far_probability = 1.0 - self.new_landmark_threshold
                    not_far_probability *= 0.9  # decrease minimum probability that new landmark is not "far"
                    self.new_landmark_threshold = 1.0 - not_far_probability
                    self.loop_closure_threshold = 0.5 * self.new_landmark_threshold
                    log.info(
                        'Increased landmark threshold to %.3f (%.3f)',
                        self.new_landmark_threshold,
                        self.loop_closure_threshold,
                    )
                else:
                    log.info('Landmark threshold unchanged, ratio %.3f', ratio)

                self.frames_analyzed = 0
                self.landmarks_generated = 0

        return bonuses