def from_parameters(params: Parameters) -> Optional["SpackConfiguration"]: if SpackConfiguration.SPACK_ENVIRONMENT_PARAM in params: if SpackConfiguration.SPACK_PACKAGES_PARAM in params: raise RuntimeError( f"{SpackConfiguration.SPACK_ENVIRONMENT_PARAM} " f"and {SpackConfiguration.SPACK_PACKAGES_PARAM} are mutually exclusive" ) return SpackConfiguration( spack_root=params.existing_directory( SpackConfiguration.SPACK_ROOT_PARAM), spack_environment=params.string( SpackConfiguration.SPACK_ENVIRONMENT_PARAM), ) elif SpackConfiguration.SPACK_PACKAGES_PARAM in params: if SpackConfiguration.SPACK_ENVIRONMENT_PARAM in params: raise RuntimeError( f"{SpackConfiguration.SPACK_ENVIRONMENT_PARAM} " f"and {SpackConfiguration.SPACK_PACKAGES_PARAM} are mutually exclusive" ) return SpackConfiguration( spack_root=params.existing_directory( SpackConfiguration.SPACK_ROOT_PARAM), spack_packages=[ SpackPackage.parse(package_specifier) for package_specifier in params.arbitrary_list( SpackConfiguration.SPACK_PACKAGES_PARAM) ], ) else: return None
def main(params: Parameters): # List of the six ACE corpus /adj/ folders (one for each type: bc, bn, cts, nw, un, wl) corpus_paths = params.arbitrary_list("corpus_paths") # Path to the project config file template (json file) json_template_path = params.existing_file("json_template_path") # Path to the cached_annotation_ser directory annotation_ser_path = params.existing_directory("annotation_ser_path") # Path to the cached_xmi directory cached_xmi_path = params.existing_directory("cached_xmi_path") # Path to target corpus (narrowed ACE-Corpus) cached_ace_data_path = params.creatable_directory("cached_ace_data_path") # List of users (strings) user_list = params.arbitrary_list("user_list") # List of event type (Format: "EVENT_TYPE.SUBTYPE" strings) event_list = params.arbitrary_list("event_list") # Output Directory Path where configured projects are moved to (use an empty directory) output_dir_path = params.creatable_directory("output_dir_path") flatten_ace_data(corpus_paths, cached_ace_data_path) complete_map = get_complete_project_to_doc_mapping(cached_ace_data_path) for user in user_list: for event_type in event_list: # For All events to be printed if event_type == "All": for event in complete_map: configure_and_generate_project( json_template_path=json_template_path, event_name=event, user_name=user, event_doc_map=complete_map, cached_ser_path=annotation_ser_path, cached_xmi_path=cached_xmi_path, output_dir_path=output_dir_path) else: configure_and_generate_project( json_template_path=json_template_path, event_name=event_type, user_name=user, event_doc_map=complete_map, cached_ser_path=annotation_ser_path, cached_xmi_path=cached_xmi_path, output_dir_path=output_dir_path)
def main(params: Parameters): adam_root = params.existing_directory("adam_root") m13_experiments_dir = adam_root / "parameters" / "experiments" / "m13" param_files: List[Path] = [] if params.boolean("include_objects", default=True): param_files.append(m13_experiments_dir / "objects.params") if params.boolean("include_imprecise_size", default=True): param_files.append(m13_experiments_dir / "imprecise_size.params") if params.boolean("include_imprecise_temporal", default=True): param_files.append(m13_experiments_dir / "imprecise_temporal.params") if params.boolean("include_subtle_verb", default=True): param_files.append(m13_experiments_dir / "subtle_verb.params") if params.boolean("include_object_restrictions", default=True): param_files.append(m13_experiments_dir / "object_restrictions.params") if params.boolean("include_functionally_defined_objects", default=True): param_files.append(m13_experiments_dir / "functionally_defined_objects.params") if params.boolean("include_relations", default=True): param_files.append(m13_experiments_dir / "relations.params") if params.boolean("include_generics", default=True): param_files.append(m13_experiments_dir / "generics.params") if params.boolean("include_verbs_with_dynamic_prepositions", default=True): param_files.append(m13_experiments_dir / "events_with_dynamic_prepositions.params") if params.boolean("include_m9_complete", default=False): param_files.append(m13_experiments_dir / "m9_complete.params") if params.boolean("include_m13_complete", default=False): param_files.append(m13_experiments_dir / "m13_complete.params") if params.boolean("include_m13_shuffled", default=False): param_files.append(m13_experiments_dir / "m13_shuffled.params") # This activates a special "debug" curriculum, # which is meant to be edited in the code by a developer to do fine-grained debugging. if params.boolean("include_debug", default=False): param_files.append(m13_experiments_dir / "debug.params") # If any of the param files don't exist, bail out earlier instead of making the user # wait for the error. for param_file in param_files: if not param_file.exists(): raise RuntimeError( f"Expected param file {param_file} does not exist") for param_file in param_files: logging.info("Running %s", param_file) experiment_params = YAMLParametersLoader().load(param_file) log_experiment_entry_point(experiment_params)
def main(params: Parameters): adam_root = params.existing_directory("adam_root") m9_experiments_dir = adam_root / "parameters" / "experiments" / "m9" param_files: List[Path] = [] if params.boolean("include_objects", default=True): param_files.append(m9_experiments_dir / "objects.params") if params.boolean("include_attributes", default=True): param_files.append(m9_experiments_dir / "attributes.params") if params.boolean("include_relations", default=True): param_files.append(m9_experiments_dir / "relations.params") if params.boolean("include_events", default=True): param_files.append(m9_experiments_dir / "events.params") # This activates a special "debug" curriculum, # which is meant to be edited in the code by a developer to do fine-grained debugging. if params.boolean("include_debug", default=False): param_files.append(m9_experiments_dir / "debug.params") # If any of the param files don't exist, bail out earlier instead of making the user # wait for the error. for param_file in param_files: if not param_file.exists(): raise RuntimeError( f"Expected param file {param_file} does not exist") for param_file in param_files: logging.info("Running %s", param_file) experiment_params = YAMLParametersLoader().load(param_file) log_experiment_entry_point(experiment_params)
def from_parameters(params: Parameters) -> Optional["CondaConfiguration"]: if CondaConfiguration.CONDA_ENVIRONMENT_PARAM in params: return CondaConfiguration( conda_base_path=params.existing_directory("conda_base_path"), conda_environment=params.string( CondaConfiguration.CONDA_ENVIRONMENT_PARAM), ) else: return None
def main(params: Parameters) -> None: input_directory = params.existing_directory("input_directory") key_function = key_function_from_params(params) with byte_key_value_sink_from_params(params, eval_context=locals()) as sink: for item_path in input_directory.rglob("*"): if item_path.is_file(): logging.info("Copying %s to output sink", item_path) sink.put(key=key_function(item_path), value=item_path.read_bytes())
def main(params: Parameters): adam_root = params.existing_directory("adam_root") experiments_dir = adam_root / "parameters" / "experiments" param_file = experiments_dir / "object_restrictions.params" if not param_file.exists(): raise RuntimeError(f"Expected param file {param_file} does not exist") logging.info("Running %s", param_file) experiment_params = YAMLParametersLoader().load(param_file) log_experiment_entry_point(experiment_params)
def main(params: Parameters): adam_root = params.existing_directory("adam_root") m6_experiments_dir = adam_root / "parameters" / "experiments" / "m6" param_files = [ m6_experiments_dir / "each-object-by-itself.pursuit.params", m6_experiments_dir / "pursuit-single-noise.params", m6_experiments_dir / "static-prepositions.params", m6_experiments_dir / "pursuit-double-noise.params", ] # If any of the param files don't exist, bail out earlier instead of making the user # wait for the error. for param_file in param_files: if not param_file.exists(): raise RuntimeError( f"Expected param file {param_file} does not exist") for param_file in param_files: logging.info("Running %s", param_file) experiment_params = YAMLParametersLoader().load(param_file) log_experiment_entry_point(experiment_params)
def main(params: Parameters): viz = SituationVisualizer() # try to get the directory for rendering for an experiment adam_root = params.existing_directory("adam_root") root_output_directory = params.optional_creatable_directory( "experiment_group_dir") if root_output_directory is not None: m9_experiments_dir = adam_root / "parameters" / "experiments" / "m9" param_files: List[Path] = [] if params.boolean("include_objects"): param_files.append(m9_experiments_dir / "objects.params") if params.boolean("include_attributes"): param_files.append(m9_experiments_dir / "attributes.params") if params.boolean("include_relations"): param_files.append(m9_experiments_dir / "relations.params") if params.boolean("include_events"): param_files.append(m9_experiments_dir / "events.params") # This activates a special "debug" curriculum, # which is meant to be edited in the code by a developer to do fine-grained debugging. if params.boolean("include_debug", default=False): param_files.append(m9_experiments_dir / "debug.params") # loop over all experiment params files for param_file in param_files: experiment_params = YAMLParametersLoader().load(param_file) if "curriculum" in experiment_params: # get the experiment curriculum list (if there is one) curriculum = curriculum_from_params(experiment_params)[0] directory_name = experiment_params.string( "experiment") + "/renders" if not os.path.isdir(root_output_directory / directory_name): os.mkdir(root_output_directory / directory_name) for instance_group in curriculum: try: make_scenes( params, [instance_group], root_output_directory / directory_name, viz, ) except RuntimeError as err: print(f"uncaught exception: {err}") else: # render phase 1 scenes: root_output_directory = params.optional_creatable_directory( "screenshot_directory") assert root_output_directory is not None if not os.path.isdir(root_output_directory): os.mkdir(root_output_directory) for idx, instance_group in enumerate( build_curriculum(None, None, GAILA_PHASE_1_LANGUAGE_GENERATOR)): # do any filtering here if instance_group.name() in EXCLUDED_CURRICULA: continue directory_name = f"{idx:03}-{instance_group.name()}" if not os.path.isdir(root_output_directory / directory_name): os.mkdir(root_output_directory / directory_name) # type: ignore # then call some function from make_scenes.py to run the curriculum make_scenes(params, [instance_group], root_output_directory / directory_name, viz)
def main(params: Parameters): adam_root = params.existing_directory("adam_root") m13_experiments_dir = adam_root / "parameters" / "experiments" / "m13" use_pegasus = params.boolean("use_pegasus", default=False) if use_pegasus: initialize_vista_pegasus_wrapper(params) param_files: List[Path] = [] if params.boolean("include_objects", default=True): param_files.append(m13_experiments_dir / "objects.params") if params.boolean("include_imprecise_size", default=True): param_files.append(m13_experiments_dir / "imprecise_size.params") if params.boolean("include_imprecise_temporal", default=True): param_files.append(m13_experiments_dir / "imprecise_temporal.params") if params.boolean("include_subtle_verb", default=True): param_files.append(m13_experiments_dir / "subtle_verb.params") if params.boolean("include_object_restrictions", default=True): param_files.append(m13_experiments_dir / "object_restrictions.params") if params.boolean("include_functionally_defined_objects", default=True): param_files.append(m13_experiments_dir / "functionally_defined_objects.params") if params.boolean("include_relations", default=True): param_files.append(m13_experiments_dir / "relations.params") if params.boolean("include_generics", default=True): param_files.append(m13_experiments_dir / "generics.params") if params.boolean("include_verbs_with_dynamic_prepositions", default=True): param_files.append( m13_experiments_dir / "events_with_dynamic_prepositions.params" ) if params.boolean("include_m9_complete", default=False): param_files.append(m13_experiments_dir / "m9_complete.params") if params.boolean("include_m13_complete", default=False): param_files.append(m13_experiments_dir / "m13_complete.params") if params.boolean("include_m13_shuffled", default=False): param_files.append(m13_experiments_dir / "m13_shuffled.params") # This activates a special "debug" curriculum, # which is meant to be edited in the code by a developer to do fine-grained debugging. if params.boolean("include_debug", default=False): param_files.append(m13_experiments_dir / "debug.params") # If any of the param files don't exist, bail out earlier instead of making the user # wait for the error. for param_file in param_files: if not param_file.exists(): raise RuntimeError(f"Expected param file {param_file} does not exist") for param_file in param_files: logging.info("Running %s", param_file) experiment_params = YAMLParametersLoader().load(param_file) if not use_pegasus: log_experiment_entry_point(experiment_params) else: experiment_name = Locator(experiment_params.string("experiment")) experiment_params = experiment_params.unify( { "experiment_group_dir": directory_for(experiment_name) / "output", "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", # State pickles will go under experiment_name/learner_state "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "log_hypothesis_every_n_steps": params.integer( "save_state_every_n_steps" ), "debug_learner_pickling": params.boolean( "debug_learner_pickling", default=False ), } ) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[] ) if use_pegasus: write_workflow_description()
def example_workflow(params: Parameters): # pragma: no cover """ An example script to generate a container workflow for submission to Pegasus. """ tmp_path = params.creatable_directory("example_root_dir") docker_tar = params.creatable_file("docker_tar") docker_build_dir = params.existing_directory("docker_build_dir") docker_image_name = params.string( "docker_image_name", default="pegasus_wrapper_container_demo" ) docker_image_tag = params.string("docker_image_tag", default="0.2") mongo_db_tar = params.string( "mongo_db_tar", default="/nas/gaia/shared/cluster/docker/mongo-4.4.tar" ) monogo_db_data = "/scratch/dockermount/pegasus_wrapper_tmp/data" mongo_db_config = "/scratch/dockermount/pegasus_wrapper_tmp/config" # Generating parameters for initializing a workflow # We recommend making workflow directory, site, and partition parameters # in an research workflow workflow_params = Parameters.from_mapping( { "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "home_dir": str(tmp_path), "partition": "scavenge", } ) saga31_request = SlurmResourceRequest.from_parameters( Parameters.from_mapping({"run_on_single_node": "saga31", "partition": "gaia"}) ) workflow_params = workflow_params.unify(params) # Our source input for the sample jobs input_file = tmp_path / "raw_nums.txt" add_y_output_file_nas = tmp_path / "nums_y.txt" sorted_output_file_nas = tmp_path / "sorted.txt" random = Random() random.seed(0) nums = [int(random.random() * 100) for _ in range(0, 25)] # Base Job Locator job_locator = Locator(("jobs",)) docker_python_root = Path("/home/app/") job_profile = PegasusProfile( namespace="pegasus", key="transfer.bypass.input.staging", value="True" ) # Write a list of numbers out to be able to run the workflow with input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) initialize_vista_pegasus_wrapper(workflow_params) build_container = run_bash( job_locator / "build_docker", command=[ "mkdir -p /scratch/dockermount/pegasus_wrapper_tmp", f"cd {docker_build_dir}", f"docker build . -t {docker_image_name}:{docker_image_tag}", f"docker save -o /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_image_name}:{docker_image_tag}", f"cp /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_tar.absolute()}", f"chmod go+r {docker_tar.absolute()}", f"docker load --input {mongo_db_tar}", f"mkdir -p {monogo_db_data}", f"mkdir -p {mongo_db_config}", ], depends_on=[], resource_request=saga31_request, ) python36 = add_container( f"{docker_image_name}:{docker_image_tag}", "docker", str(docker_tar.absolute()), image_site="saga", bypass_staging=True, ) mongo4_4 = add_container( "mongo:4.4", "docker", mongo_db_tar, image_site="saga", bypass_staging=True ) start_mongo = start_docker_as_service( mongo4_4, depends_on=[build_container], mounts=[f"{monogo_db_data}:/data/db", f"{mongo_db_config}/etc/custom"], docker_args=f"-p 27017:27017", resource_request=saga31_request, ) add_y_job = run_python_on_args( job_locator / "add", docker_python_root / "add_y.py", set_args=f"{input_file} {add_y_output_file_nas} --y 10", depends_on=[build_container], job_profiles=[job_profile], resource_request=saga31_request, container=python36, input_file_paths=[input_file], output_file_paths=[add_y_output_file_nas], ) sort_job = run_python_on_parameters( job_locator / "sort", sort_nums_in_file, {"input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas}, depends_on=[add_y_job], container=python36, job_profiles=[job_profile], resource_request=saga31_request, input_file_paths=add_y_output_file_nas, output_file_paths=sorted_output_file_nas, ) _ = stop_docker_as_service( mongo4_4, depends_on=[start_mongo, sort_job], resource_request=saga31_request ) # Generate the Pegasus DAX file & a Submit Script write_workflow_description(tmp_path)
def from_parameters(params: Parameters) -> KeyValueSink[str, bytes]: """ Create a key-value sink writing to a directory. """ return _DirectoryBytesKeyValueSink(params.existing_directory("path"))