def get_datastore_for_provider(self, provider_type: str) -> Any: """Get the provider datastore registered with this db""" if provider_type not in self.__provider_datastores: # Register this provider for usage now ProviderClass = get_crowd_provider_from_type(provider_type) provider = ProviderClass(self) return self.__provider_datastores.get(provider_type)
def __new__(cls, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None) -> "Agent": """ The new method is overridden to be able to automatically generate the expected Agent class without needing to specifically find it for a given db_id. As such it is impossible to create a base Agent as you will instead be returned the correct Agent class according to the crowdprovider associated with this Agent. """ from mephisto.core.registry import get_crowd_provider_from_type if cls == Agent: # We are trying to construct a Agent, find what type to use and # create that instead if row is None: row = db.get_agent(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" correct_class = get_crowd_provider_from_type( row["provider_type"]).AgentClass return super().__new__(correct_class) else: # We are constructing another instance directly return super().__new__(cls)
def requester_register(requester_type): options = request.get_json() crowd_provider = get_crowd_provider_from_type(requester_type) RequesterClass = crowd_provider.RequesterClass try: parsed_options = parse_arg_dict(RequesterClass, options) except Exception as e: traceback.print_exc(file=sys.stdout) return jsonify({ "success": False, "msg": f"error in parsing arguments: {str(e)}" }) if "name" not in parsed_options: return jsonify({ "success": False, "msg": "No name was specified for the requester." }) db = app.extensions["db"] requesters = db.find_requesters(requester_name=parsed_options["name"]) if len(requesters) == 0: requester = RequesterClass.new(db, parsed_options["name"]) else: requester = requesters[0] try: print(parsed_options) requester.register(parsed_options) return jsonify({"success": True}) except Exception as e: return jsonify({"success": False, "msg": str(e)})
def delete_qualification(self, qualification_name: str) -> None: """ Remove this qualification from all workers that have it, then delete the qualification """ self._delete_qualification(qualification_name) for crowd_provider_name in get_valid_provider_types(): ProviderClass = get_crowd_provider_from_type(crowd_provider_name) provider = ProviderClass(self) provider.cleanup_qualification(qualification_name)
def get_provider(self) -> "CrowdProvider": """Return the crowd provider used to launch this task""" from mephisto.core.registry import get_crowd_provider_from_type if self.__crowd_provider is None: CrowdProviderClass = get_crowd_provider_from_type( self.provider_type) self.__crowd_provider = CrowdProviderClass(self.db) return self.__crowd_provider
def register_provider(args): """Register a requester with a crowd provider""" if len(args) == 0: click.echo( "Usage: mephisto register <provider_type> --arg1:value --arg2:value" ) return from mephisto.core.local_database import LocalMephistoDB from mephisto.core.registry import get_crowd_provider_from_type from mephisto.core.argparse_parser import parse_arg_dict, get_extra_argument_dicts provider_type, requester_args = args[0], args[1:] args_dict = dict(arg.split(":") for arg in requester_args) transformed = dict((key, { "option_string": key, "value": value }) for (key, value) in args_dict.items()) crowd_provider = get_crowd_provider_from_type(provider_type) RequesterClass = crowd_provider.RequesterClass if len(requester_args) == 0: from tabulate import tabulate params = get_extra_argument_dicts(RequesterClass) for param in params: click.echo(param["desc"]) click.echo(tabulate(param["args"].values(), headers="keys")) return try: parsed_options = parse_arg_dict(RequesterClass, transformed) except Exception as e: click.echo(str(e)) if "name" not in parsed_options: click.echo("No name was specified for the requester.") db = LocalMephistoDB() requesters = db.find_requesters(requester_name=parsed_options["name"]) if len(requesters) == 0: requester = RequesterClass.new(db, parsed_options["name"]) else: requester = requesters[0] try: requester.register(parsed_options) click.echo("Registered successfully.") except Exception as e: click.echo(str(e))
def requester_launch_options(requester_type): db = app.extensions["db"] requesters = db.find_requesters(requester_name=requester_name) if len(requesters) == 0: return jsonify({ "success": False, "msg": f"No requester available with name: {requester_name}", }) provider_type = requesters[0].provider_type CrowdProviderClass = get_crowd_provider_from_type(requester_type) params = get_extra_argument_dicts(CrowdProviderClass) return jsonify({"success": True, "options": params})
def __init__(self, task_run: "TaskRun"): self.db = task_run.db # Try to find existing parsed args arg_path = os.path.join(task_run.get_run_dir(), CONFIG_FILE_PATH) if os.path.exists(arg_path): with open(arg_path, "r") as config_file: args = json.load(config_file) else: # parse new arguments BlueprintClass = get_blueprint_from_type(task_run.task_type) CrowdProviderClass = get_crowd_provider_from_type( task_run.provider_type) param_string = task_run.param_string parser = argparse.ArgumentParser() blueprint_group = parser.add_argument_group("blueprint") BlueprintClass.add_args_to_group(blueprint_group) provider_group = parser.add_argument_group("crowd_provider") CrowdProviderClass.add_args_to_group(provider_group) task_group = parser.add_argument_group("task_config") TaskConfig.add_args_to_group(task_group) try: arg_namespace, _unknown = parser.parse_known_args( shlex.split(param_string)) except SystemExit: raise Exception(f"Argparse broke on {param_string} - must fix") args = vars(arg_namespace) with open(arg_path, "w+") as config_file: json.dump(args, config_file) # Parse out specific arguments for the task_config self.args: Dict[str, Any] = args self.task_title: str = args["task_title"] self.task_description: str = args["task_description"] self.task_reward: float = args["task_reward"] self.task_tags: List[str] = [ s.strip() for s in args["task_tags"].split(",") ] self.assignment_duration_in_seconds: int = args[ "assignment_duration_in_seconds"] self.allowed_concurrent: int = args["allowed_concurrent"] self.maximum_units_per_worker: int = args["maximum_units_per_worker"]
def get_crowd_provider_class(self) -> Type["CrowdProvider"]: """Get the CrowdProvider class that manages this Unit""" from mephisto.core.registry import get_crowd_provider_from_type return get_crowd_provider_from_type(self.provider_type)
def parse_and_launch_run( self, arg_list: Optional[List[str]] = None, extra_args: Optional[Dict[str, Any]] = None, ) -> str: """ Parse the given arguments and launch a job. """ if extra_args is None: extra_args = {} # Extract the abstractions being used parser = self._get_baseline_argparser() type_args, task_args_string = parser.parse_known_args(arg_list) requesters = self.db.find_requesters(requester_name=type_args.requester_name) if len(requesters) == 0: raise EntryDoesNotExistException( f"No requester found with name {type_args.requester_name}" ) requester = requesters[0] requester_id = requester.db_id provider_type = requester.provider_type # Parse the arguments for the abstractions to ensure # everything required is set BlueprintClass = get_blueprint_from_type(type_args.blueprint_type) ArchitectClass = get_architect_from_type(type_args.architect_type) CrowdProviderClass = get_crowd_provider_from_type(provider_type) task_args, _unknown = self._parse_args_from_classes( BlueprintClass, ArchitectClass, CrowdProviderClass, task_args_string ) task_args.update(extra_args) # Load the classes to force argument validation before anything # is actually created in the database # TODO(#94) perhaps parse the arguments for these things one at a time? BlueprintClass.assert_task_args(task_args) ArchitectClass.assert_task_args(task_args) CrowdProviderClass.assert_task_args(task_args) # Find an existing task or create a new one task_name = task_args.get("task_name") if task_name is None: task_name = type_args.blueprint_type logger.warning( f"Task is using the default blueprint name {task_name} as a name, as no task_name is provided" ) tasks = self.db.find_tasks(task_name=task_name) task_id = None if len(tasks) == 0: task_id = self.db.new_task(task_name, type_args.blueprint_type) else: task_id = tasks[0].db_id logger.info(f"Creating a task run under task name: {task_name}") # Create a new task run new_run_id = self.db.new_task_run( task_id, requester_id, " ".join([shlex.quote(x) for x in task_args_string]), provider_type, type_args.blueprint_type, requester.is_sandbox(), ) task_run = TaskRun(self.db, new_run_id) try: # If anything fails after here, we have to cleanup the architect build_dir = os.path.join(task_run.get_run_dir(), "build") os.makedirs(build_dir, exist_ok=True) architect = ArchitectClass(self.db, task_args, task_run, build_dir) # Register the blueprint with args to the task run, # ensure cached blueprint = BlueprintClass(task_run, task_args) task_run.get_blueprint(opts=task_args) # Setup and deploy the server built_dir = architect.prepare() task_url = architect.deploy() # TODO(#102) maybe the cleanup (destruction of the server configuration?) should only # happen after everything has already been reviewed, this way it's possible to # retrieve the exact build directory to review a task for real architect.cleanup() # Create the backend runner task_runner = BlueprintClass.TaskRunnerClass(task_run, task_args) # Small hack for auto appending block qualification existing_qualifications = task_args.get("qualifications", []) if task_args.get("block_qualification") is not None: existing_qualifications.append( make_qualification_dict( task_args["block_qualification"], QUAL_NOT_EXIST, None ) ) if task_args.get("onboarding_qualification") is not None: existing_qualifications.append( make_qualification_dict( OnboardingRequired.get_failed_qual( task_args["onboarding_qualification"] ), QUAL_NOT_EXIST, None, ) ) task_args["qualifications"] = existing_qualifications # Register the task with the provider provider = CrowdProviderClass(self.db) provider.setup_resources_for_task_run(task_run, task_args, task_url) initialization_data_array = blueprint.get_initialization_data() # Link the job together job = self.supervisor.register_job( architect, task_runner, provider, existing_qualifications ) if self.supervisor.sending_thread is None: self.supervisor.launch_sending_thread() except (KeyboardInterrupt, Exception) as e: logger.error( "Encountered error while launching run, shutting down", exc_info=True ) try: architect.shutdown() except (KeyboardInterrupt, Exception) as architect_exception: logger.exception( f"Could not shut down architect: {architect_exception}", exc_info=True, ) raise e launcher = TaskLauncher(self.db, task_run, initialization_data_array) launcher.create_assignments() launcher.launch_units(task_url) self._task_runs_tracked[task_run.db_id] = TrackedRun( task_run=task_run, task_launcher=launcher, task_runner=task_runner, architect=architect, job=job, ) return task_run.db_id
def validate_and_run_config_or_die( self, run_config: DictConfig, shared_state: Optional[SharedTaskState] = None, ) -> str: """ Parse the given arguments and launch a job. """ if shared_state is None: shared_state = SharedTaskState() # First try to find the requester: requester_name = run_config.provider.requester_name requesters = self.db.find_requesters(requester_name=requester_name) if len(requesters) == 0: if run_config.provider.requester_name == "MOCK_REQUESTER": requesters = [get_mock_requester(self.db)] else: raise EntryDoesNotExistException( f"No requester found with name {requester_name}") requester = requesters[0] requester_id = requester.db_id provider_type = requester.provider_type assert provider_type == run_config.provider._provider_type, ( f"Found requester for name {requester_name} is not " f"of the specified type {run_config.provider._provider_type}, " f"but is instead {provider_type}.") # Next get the abstraction classes, and run validation # before anything is actually created in the database blueprint_type = run_config.blueprint._blueprint_type architect_type = run_config.architect._architect_type BlueprintClass = get_blueprint_from_type(blueprint_type) ArchitectClass = get_architect_from_type(architect_type) CrowdProviderClass = get_crowd_provider_from_type(provider_type) BlueprintClass.assert_task_args(run_config, shared_state) ArchitectClass.assert_task_args(run_config, shared_state) CrowdProviderClass.assert_task_args(run_config, shared_state) # Find an existing task or create a new one task_name = run_config.task.get("task_name", None) if task_name is None: task_name = blueprint_type logger.warning( f"Task is using the default blueprint name {task_name} as a name, " "as no task_name is provided") tasks = self.db.find_tasks(task_name=task_name) task_id = None if len(tasks) == 0: task_id = self.db.new_task(task_name, blueprint_type) else: task_id = tasks[0].db_id logger.info(f"Creating a task run under task name: {task_name}") # Create a new task run new_run_id = self.db.new_task_run( task_id, requester_id, json.dumps(OmegaConf.to_container(run_config, resolve=True)), provider_type, blueprint_type, requester.is_sandbox(), ) task_run = TaskRun(self.db, new_run_id) try: # If anything fails after here, we have to cleanup the architect build_dir = os.path.join(task_run.get_run_dir(), "build") os.makedirs(build_dir, exist_ok=True) architect = ArchitectClass(self.db, run_config, shared_state, task_run, build_dir) # Register the blueprint with args to the task run, # ensure cached blueprint = BlueprintClass(task_run, run_config, shared_state) task_run.get_blueprint(args=run_config, shared_state=shared_state) # Setup and deploy the server built_dir = architect.prepare() task_url = architect.deploy() # TODO(#102) maybe the cleanup (destruction of the server configuration?) should only # happen after everything has already been reviewed, this way it's possible to # retrieve the exact build directory to review a task for real architect.cleanup() # Create the backend runner task_runner = BlueprintClass.TaskRunnerClass( task_run, run_config, shared_state) # Small hack for auto appending block qualification existing_qualifications = shared_state.qualifications if run_config.blueprint.get("block_qualification", None) is not None: existing_qualifications.append( make_qualification_dict( run_config.blueprint.block_qualification, QUAL_NOT_EXIST, None)) if run_config.blueprint.get("onboarding_qualification", None) is not None: existing_qualifications.append( make_qualification_dict( OnboardingRequired.get_failed_qual( run_config.blueprint.onboarding_qualification, ), QUAL_NOT_EXIST, None, )) shared_state.qualifications = existing_qualifications # Register the task with the provider provider = CrowdProviderClass(self.db) provider.setup_resources_for_task_run(task_run, run_config, task_url) initialization_data_array = blueprint.get_initialization_data() # Link the job together job = self.supervisor.register_job(architect, task_runner, provider, existing_qualifications) if self.supervisor.sending_thread is None: self.supervisor.launch_sending_thread() except (KeyboardInterrupt, Exception) as e: logger.error( "Encountered error while launching run, shutting down", exc_info=True) try: architect.shutdown() except (KeyboardInterrupt, Exception) as architect_exception: logger.exception( f"Could not shut down architect: {architect_exception}", exc_info=True, ) raise e launcher = TaskLauncher(self.db, task_run, initialization_data_array) launcher.create_assignments() launcher.launch_units(task_url) self._task_runs_tracked[task_run.db_id] = TrackedRun( task_run=task_run, task_launcher=launcher, task_runner=task_runner, architect=architect, job=job, ) task_run.update_completion_progress(status=False) return task_run.db_id
def get_provider(self) -> "CrowdProvider": """Return the crowd provider used to launch this task""" if self.__crowd_provider is None: CrowdProviderClass = get_crowd_provider_from_type(self.provider_type) self.__crowd_provider = CrowdProviderClass(self.db) return self.__crowd_provider
def get_help_arguments(args): if len(args) == 0: click.echo( "Usage: mephisto wut <abstraction>[=<type>] [...specific args to check]" ) return from mephisto.core.registry import ( get_blueprint_from_type, get_crowd_provider_from_type, get_architect_from_type, get_valid_blueprint_types, get_valid_provider_types, get_valid_architect_types, ) from mephisto.core.argparse_parser import get_extra_argument_dicts VALID_ABSTRACTIONS = [ "blueprint", "architect", "requester", "provider", "task" ] abstraction_equal_split = args[0].split("=", 1) abstraction = abstraction_equal_split[0] if abstraction not in VALID_ABSTRACTIONS: click.echo( f"Given abstraction {abstraction} not in valid abstractions {VALID_ABSTRACTIONS}" ) return if abstraction == "task": from mephisto.data_model.task_config import TaskConfig target_class = TaskConfig else: if len(abstraction_equal_split) == 1: # querying about the general abstraction if abstraction == "blueprint": click.echo( f"The blueprint determines the task content. Valid blueprints are {get_valid_blueprint_types()}" ) return elif abstraction == "architect": click.echo( f"The architect determines the server where a task is hosted. Valid architects are {get_valid_architect_types()}" ) return elif abstraction == "requester": click.echo( f"The requester is an account for a crowd provider. Valid requester types are {get_valid_provider_types()}. \n" "Use `mephisto requesters` to see registered requesters, and `mephisto register <requester type>` to register." ) return elif abstraction == "provider": click.echo( f"The crowd provider determines the source of the crowd workers. Valid provider are {get_valid_provider_types()}" ) return # There's a specific abstraction to check abstract_value = abstraction_equal_split[1] target_class = None valid = None if abstraction == "blueprint": try: target_class = get_blueprint_from_type(abstract_value) except: valid = get_valid_blueprint_types() elif abstraction == "architect": try: target_class = get_architect_from_type(abstract_value) except: valid = get_valid_architect_types() elif abstraction == "provider": try: target_class = get_crowd_provider_from_type(abstract_value) except: valid = get_valid_provider_types() elif abstraction == "requester": try: target_class = get_crowd_provider_from_type( abstract_value).RequesterClass except: valid = get_valid_provider_types() if valid is not None: click.echo( f"The valid types for {abstraction} are {valid}. '{abstract_value}' not found." ) return from tabulate import tabulate arg_dict = get_extra_argument_dicts(target_class)[0] click.echo(arg_dict["desc"]) checking_args = arg_dict["args"] if len(args) > 1: checking_args = { k: v for k, v in checking_args.items() if k in args[1:] } click.echo(tabulate(checking_args.values(), headers="keys"))
def requester_details(requester_type): crowd_provider = get_crowd_provider_from_type(requester_type) RequesterClass = crowd_provider.RequesterClass params = get_extra_argument_dicts(RequesterClass) return jsonify(params)
def get_crowd_provider_class(self) -> Type["CrowdProvider"]: """Get the CrowdProvider class that manages this Unit""" return get_crowd_provider_from_type(self.provider_type)