Beispiel #1
0
        def setup_teardown(self):
            """
            Call code to set up and tear down tests.

            Run this only once because we'll be running all Fast ACUTE code before
            checking any results.
            """

            self._setup()

            # Set up common temp directory
            root_dir = tempfile.mkdtemp()

            # Define output structure
            outputs = {}

            # Set up config
            test_overrides = [
                f'+mephisto.blueprint.config_path={self.TASK_DIRECTORY}/task_config/model_config_dataset.json',
                f'+mephisto.blueprint.models=\"{self.MODEL_STRING}\"',
                '+mephisto.blueprint.model_pairs=""',
                '+mephisto.blueprint.num_task_data_episodes=500',
                '+mephisto.blueprint.selfchat_max_turns=6',
            ]
            # TODO: clean this up when Hydra has support for recursive defaults
            self._set_up_config(
                blueprint_type=FAST_ACUTE_BLUEPRINT_TYPE,
                task_directory=self.TASK_DIRECTORY,
                overrides=self._get_common_overrides(root_dir) + test_overrides,
            )
            self.config.mephisto.blueprint.model_pairs = None
            # TODO: hack to manually set mephisto.blueprint.model_pairs to None. Remove
            #  when Hydra releases support for recursive defaults

            # Run Fast ACUTEs
            runner = FastAcuteExecutor(self.config)
            runner.compile_chat_logs()
            runner.set_up_acute_eval()
            self.config.mephisto.blueprint = runner.fast_acute_args
            self._set_up_server()
            outputs['state'] = self._get_agent_state(task_data=self.TASK_DATA)

            # Run analysis
            runner.analyze_results(args=f'--mephisto-root {self.database_path}')
            outputs['results_folder'] = runner.results_path

            yield outputs
            # All code after this will be run upon teardown

            self._teardown()

            # Tear down temp file
            shutil.rmtree(root_dir)
        def setup_teardown(self):
            """
            Call code to set up and tear down tests.

            Run this only once because we'll be running all Fast ACUTE code before
            checking any results.
            """

            self._setup()

            # Set up common temp directory
            root_dir = tempfile.mkdtemp()

            # Copy over expected self-chat files
            shutil.copytree(
                os.path.join(self.TASK_DIRECTORY, 'task_config', 'self_chats'),
                os.path.join(root_dir, 'self_chats'),
            )

            # Define output structure
            outputs = {}

            # Set up config
            test_overrides = [
                'mephisto.blueprint.use_existing_self_chat_files=True'
            ]
            self._set_up_config(
                task_directory=self.TASK_DIRECTORY,
                overrides=self._get_common_overrides(root_dir) +
                test_overrides,
                config_name=FAST_ACUTE_CONFIG_NAME,
            )

            # Run Fast ACUTEs
            runner = FastAcuteExecutor(self.config)
            runner.compile_chat_logs()
            runner.set_up_acute_eval()
            self.config.mephisto.blueprint = runner.fast_acute_args
            self._set_up_server()
            outputs['state'] = self._get_agent_state(task_data=self.TASK_DATA)

            # Run analysis
            runner.analyze_results(
                args=f'--mephisto-root {self.database_path}')
            outputs['results_folder'] = runner.results_path

            yield outputs
            # All code after this will be run upon teardown

            self._teardown()

            # Tear down temp file
            shutil.rmtree(root_dir)
Beispiel #3
0
        def setup_teardown(self):
            """
            Call code to set up and tear down tests.

            Run this only once because we'll be running all Fast ACUTE code before
            checking any results.
            """

            self._setup()

            # Set up common temp directory
            root_dir = tempfile.mkdtemp()

            # Params
            config_path = os.path.join(root_dir, 'config.json')

            # Copy over expected self-chat files
            shutil.copytree(
                os.path.join(self.TASK_DIRECTORY, 'task_config', 'self_chats'),
                os.path.join(root_dir, 'self_chats'),
            )

            # Define output structure
            outputs = {}

            # # Run Fast ACUTEs and analysis on the base task

            # Set up config
            assert len(self.MODELS) == 2
            test_overrides = [
                f'+mephisto.blueprint.config_path={config_path}',
                '+mephisto.blueprint.models=""',
                f'+mephisto.blueprint.model_pairs={self.MODELS[0]}:{self.MODELS[1]}',
            ]
            # TODO: clean this up when Hydra has support for recursive defaults
            self._set_up_config(
                blueprint_type=FAST_ACUTE_BLUEPRINT_TYPE,
                task_directory=self.TASK_DIRECTORY,
                overrides=self._get_common_overrides(root_dir) + test_overrides,
            )
            self.config.mephisto.blueprint.models = None
            # TODO: hack to manually set mephisto.blueprint.models to None. Remove when
            #  Hydra releases support for recursive defaults

            # Save the config file
            config = {}
            for model in self.MODELS:
                config[model] = {
                    'log_path': FastAcuteExecutor.get_relative_selfchat_log_path(
                        root_dir=self.config.mephisto.blueprint.root_dir,
                        model=model,
                        task=self.config.mephisto.blueprint.task,
                    ),
                    'is_selfchat': True,
                }
            with open(config_path, 'w') as f:
                json.dump(config, f)

            # Run Fast ACUTEs
            runner = FastAcuteExecutor(self.config)
            runner.compile_chat_logs()
            runner.set_up_acute_eval()
            self.config.mephisto.blueprint = runner.fast_acute_args
            self._set_up_server()
            outputs['state'] = self._get_agent_state(task_data=self.TASK_DATA)

            # Run analysis
            runner.analyze_results(args=f'--mephisto-root {self.database_path}')
            outputs['results_folder'] = runner.results_path

            yield outputs
            # All code after this will be run upon teardown

            self._teardown()

            # Tear down temp file
            shutil.rmtree(root_dir)