예제 #1
0
def check():
    """Checks that mephisto is setup correctly"""
    from mephisto.core.local_database import LocalMephistoDB
    from mephisto.core.utils import get_mock_requester

    try:
        db = LocalMephistoDB()
        get_mock_requester(db)
    except Exception as e:
        click.echo("Something went wrong.")
        click.echo(e)
        return
    click.echo("Mephisto seems to be set up correctly.")
예제 #2
0
    def parse_launch_arguments(
            self, args=None) -> Tuple[str, str, "MephistoDB", Dict[str, Any]]:
        """
        Parse common arguments out from the command line, returns a 
        tuple of the architect type, the requester name to use, the
        MephistoDB to run with, and any  additional arguments parsed 
        out by the argument parser

        Defaults to a mock architect with a mock requester with no arguments
        """
        args, _unknown = self.parse_known_args(args=args)
        arg_dict = vars(args)
        requester_name = arg_dict["requester_name"]
        provider_type = arg_dict["provider_type"]
        architect_type = arg_dict["architect_type"]
        datapath = arg_dict["datapath"]

        if datapath is None:
            datapath = get_root_data_dir()

        database_path = os.path.join(datapath, "database.db")
        db = LocalMephistoDB(database_path=database_path)

        if requester_name is None:
            if provider_type is None:
                print("No requester specified, defaulting to mock")
                provider_type = "mock"
            if provider_type == "mock":
                req = get_mock_requester(db)
                requester_name = req.requester_name
            else:
                reqs = db.find_requesters(provider_type=provider_type)
                # TODO (#93) proper logging
                if len(reqs) == 0:
                    print(
                        f"No requesters found for provider type {provider_type}, please "
                        f"register one. You can register with `mephisto register {provider_type}`, "
                        f"or `python mephisto/client/cli.py register {provider_type}` if you haven't "
                        "installed Mephisto using poetry.")
                    exit(1)
                elif len(reqs) == 1:
                    req = reqs[0]
                    requester_name = req.requester_name
                    print(
                        f"Found one `{provider_type}` requester to launch with: {requester_name}"
                    )
                else:
                    req = reqs[-1]
                    requester_name = req.requester_name
                    print(
                        f"Found many `{provider_type}` requesters to launch with, "
                        f"choosing the most recent: {requester_name}")
        else:
            # Ensure provided requester exists
            reqs = db.find_requesters(requester_name=requester_name)
            if len(reqs) == 0:
                print(f"No requesters found under name {requester_name}, "
                      "have you registered with `mephisto register`?")
                exit(1)
            provider_type = reqs[0].provider_type

        # provider type and requester name now set, ensure architect
        if architect_type is None:
            if provider_type == "mock":
                architect_type = "local"
            elif provider_type == "mturk_sandbox":
                architect_type = "heroku"
            elif provider_type == "mturk":
                architect_type = "heroku"
            else:
                architect_type = "local"

            # TODO (#93) proper logging
            print(f"No architect specified, defaulting to architect "
                  f"`{architect_type}` for provider `{provider_type}`")

        if provider_type in ["mturk"]:
            input(
                f"This task is going to launch live on {provider_type}, press enter to continue: "
            )

        return architect_type, requester_name, db, arg_dict
예제 #3
0
def augment_config_from_db(script_cfg: DictConfig,
                           db: "MephistoDB") -> DictConfig:
    """
    Check the database for validity of the incoming MephistoConfig, ensure
    that the config has all the necessary fields set.
    """
    cfg = script_cfg.mephisto
    requester_name = cfg.provider.get("requester_name", None)
    provider_type = cfg.provider.get("_provider_type", None)
    architect_type = cfg.architect.get("_architect_type", None)

    if requester_name is None:
        if provider_type is None:
            print("No requester specified, defaulting to mock")
            provider_type = "mock"
        if provider_type == "mock":
            req = get_mock_requester(db)
            requester_name = req.requester_name
        else:
            reqs = db.find_requesters(provider_type=provider_type)
            # TODO (#93) proper logging
            if len(reqs) == 0:
                print(
                    f"No requesters found for provider type {provider_type}, please "
                    f"register one. You can register with `mephisto register {provider_type}`, "
                    f"or `python mephisto/client/cli.py register {provider_type}` if you haven't "
                    "installed Mephisto using poetry.")
                exit(1)
            elif len(reqs) == 1:
                req = reqs[0]
                requester_name = req.requester_name
                print(
                    f"Found one `{provider_type}` requester to launch with: {requester_name}"
                )
            else:
                req = reqs[-1]
                requester_name = req.requester_name
                print(
                    f"Found many `{provider_type}` requesters to launch with, "
                    f"choosing the most recent: {requester_name}")
    else:
        # Ensure provided requester exists
        reqs = db.find_requesters(requester_name=requester_name)
        if len(reqs) == 0:
            print(f"No requesters found under name {requester_name}, "
                  "have you registered with `mephisto register`?")
            exit(1)
        provider_type = reqs[0].provider_type

    if provider_type in ["mturk"]:
        input(
            f"This task is going to launch live on {provider_type}, press enter to continue: "
        )
    if provider_type in ["mturk_sandbox", "mturk"
                         ] and architect_type != "heroku":
        input(
            f"This task is going to launch live on {provider_type}, but your "
            f"provided architect is {architect_type}, are you sure you "
            "want to do this? : ")

    cfg.provider.requester_name = requester_name
    cfg.provider._provider_type = provider_type
    return script_cfg
예제 #4
0
    def validate_and_run_config_or_die(
        self,
        run_config: DictConfig,
        shared_state: Optional[SharedTaskState] = None,
    ) -> str:
        """
        Parse the given arguments and launch a job.
        """
        if shared_state is None:
            shared_state = SharedTaskState()

        # First try to find the requester:
        requester_name = run_config.provider.requester_name
        requesters = self.db.find_requesters(requester_name=requester_name)
        if len(requesters) == 0:
            if run_config.provider.requester_name == "MOCK_REQUESTER":
                requesters = [get_mock_requester(self.db)]
            else:
                raise EntryDoesNotExistException(
                    f"No requester found with name {requester_name}")
        requester = requesters[0]
        requester_id = requester.db_id
        provider_type = requester.provider_type
        assert provider_type == run_config.provider._provider_type, (
            f"Found requester for name {requester_name} is not "
            f"of the specified type {run_config.provider._provider_type}, "
            f"but is instead {provider_type}.")

        # Next get the abstraction classes, and run validation
        # before anything is actually created in the database
        blueprint_type = run_config.blueprint._blueprint_type
        architect_type = run_config.architect._architect_type
        BlueprintClass = get_blueprint_from_type(blueprint_type)
        ArchitectClass = get_architect_from_type(architect_type)
        CrowdProviderClass = get_crowd_provider_from_type(provider_type)

        BlueprintClass.assert_task_args(run_config, shared_state)
        ArchitectClass.assert_task_args(run_config, shared_state)
        CrowdProviderClass.assert_task_args(run_config, shared_state)

        # Find an existing task or create a new one
        task_name = run_config.task.get("task_name", None)
        if task_name is None:
            task_name = blueprint_type
            logger.warning(
                f"Task is using the default blueprint name {task_name} as a name, "
                "as no task_name is provided")
        tasks = self.db.find_tasks(task_name=task_name)
        task_id = None
        if len(tasks) == 0:
            task_id = self.db.new_task(task_name, blueprint_type)
        else:
            task_id = tasks[0].db_id

        logger.info(f"Creating a task run under task name: {task_name}")

        # Create a new task run
        new_run_id = self.db.new_task_run(
            task_id,
            requester_id,
            json.dumps(OmegaConf.to_container(run_config, resolve=True)),
            provider_type,
            blueprint_type,
            requester.is_sandbox(),
        )
        task_run = TaskRun(self.db, new_run_id)

        try:
            # If anything fails after here, we have to cleanup the architect

            build_dir = os.path.join(task_run.get_run_dir(), "build")
            os.makedirs(build_dir, exist_ok=True)
            architect = ArchitectClass(self.db, run_config, shared_state,
                                       task_run, build_dir)

            # Register the blueprint with args to the task run,
            # ensure cached
            blueprint = BlueprintClass(task_run, run_config, shared_state)
            task_run.get_blueprint(args=run_config, shared_state=shared_state)

            # Setup and deploy the server
            built_dir = architect.prepare()
            task_url = architect.deploy()

            # TODO(#102) maybe the cleanup (destruction of the server configuration?) should only
            # happen after everything has already been reviewed, this way it's possible to
            # retrieve the exact build directory to review a task for real
            architect.cleanup()

            # Create the backend runner
            task_runner = BlueprintClass.TaskRunnerClass(
                task_run, run_config, shared_state)

            # Small hack for auto appending block qualification
            existing_qualifications = shared_state.qualifications
            if run_config.blueprint.get("block_qualification",
                                        None) is not None:
                existing_qualifications.append(
                    make_qualification_dict(
                        run_config.blueprint.block_qualification,
                        QUAL_NOT_EXIST, None))
            if run_config.blueprint.get("onboarding_qualification",
                                        None) is not None:
                existing_qualifications.append(
                    make_qualification_dict(
                        OnboardingRequired.get_failed_qual(
                            run_config.blueprint.onboarding_qualification, ),
                        QUAL_NOT_EXIST,
                        None,
                    ))
            shared_state.qualifications = existing_qualifications

            # Register the task with the provider
            provider = CrowdProviderClass(self.db)
            provider.setup_resources_for_task_run(task_run, run_config,
                                                  task_url)

            initialization_data_array = blueprint.get_initialization_data()

            # Link the job together
            job = self.supervisor.register_job(architect, task_runner,
                                               provider,
                                               existing_qualifications)
            if self.supervisor.sending_thread is None:
                self.supervisor.launch_sending_thread()
        except (KeyboardInterrupt, Exception) as e:
            logger.error(
                "Encountered error while launching run, shutting down",
                exc_info=True)
            try:
                architect.shutdown()
            except (KeyboardInterrupt, Exception) as architect_exception:
                logger.exception(
                    f"Could not shut down architect: {architect_exception}",
                    exc_info=True,
                )
            raise e

        launcher = TaskLauncher(self.db, task_run, initialization_data_array)
        launcher.create_assignments()
        launcher.launch_units(task_url)

        self._task_runs_tracked[task_run.db_id] = TrackedRun(
            task_run=task_run,
            task_launcher=launcher,
            task_runner=task_runner,
            architect=architect,
            job=job,
        )
        task_run.update_completion_progress(status=False)

        return task_run.db_id