def main(params: Parameters):
    # List of the six ACE corpus /adj/ folders (one for each type: bc, bn, cts, nw, un, wl)
    corpus_paths = params.arbitrary_list("corpus_paths")
    # Path to the project config file template (json file)
    json_template_path = params.existing_file("json_template_path")
    # Path to the cached_annotation_ser directory
    annotation_ser_path = params.existing_directory("annotation_ser_path")

    # Path to the cached_xmi directory
    cached_xmi_path = params.existing_directory("cached_xmi_path")

    # Path to target corpus (narrowed ACE-Corpus)
    cached_ace_data_path = params.creatable_directory("cached_ace_data_path")

    # List of users (strings)
    user_list = params.arbitrary_list("user_list")
    # List of event type (Format: "EVENT_TYPE.SUBTYPE" strings)
    event_list = params.arbitrary_list("event_list")

    # Output Directory Path where configured projects are moved to (use an empty directory)
    output_dir_path = params.creatable_directory("output_dir_path")

    flatten_ace_data(corpus_paths, cached_ace_data_path)

    complete_map = get_complete_project_to_doc_mapping(cached_ace_data_path)

    for user in user_list:
        for event_type in event_list:
            # For All events to be printed
            if event_type == "All":
                for event in complete_map:
                    configure_and_generate_project(
                        json_template_path=json_template_path,
                        event_name=event,
                        user_name=user,
                        event_doc_map=complete_map,
                        cached_ser_path=annotation_ser_path,
                        cached_xmi_path=cached_xmi_path,
                        output_dir_path=output_dir_path)
            else:
                configure_and_generate_project(
                    json_template_path=json_template_path,
                    event_name=event_type,
                    user_name=user,
                    event_doc_map=complete_map,
                    cached_ser_path=annotation_ser_path,
                    cached_xmi_path=cached_xmi_path,
                    output_dir_path=output_dir_path)
Esempio n. 2
0
def main(params: Parameters):
    curriculum_repository_path = params.creatable_directory(
        CURRICULUM_REPOSITORY_PATH_PARAMETER
    )
    language_mode = params.enum(
        LANGUAGE_MODE_PARAMETER, LanguageMode, default=LanguageMode.ENGLISH
    )

    train_curriculum, test_curriculum = curriculum_from_params(
        params, language_mode=language_mode
    )
    strict_curriculum = ExperimentCurriculum(
        evaluate_curriculum(train_curriculum), evaluate_curriculum(test_curriculum)
    )
    write_experiment_curriculum(
        curriculum_repository_path,
        params,
        language_mode,
        strict_curriculum,
        ignored_parameters=immutableset(
            IGNORED_PARAMETERS.union(
                {CURRICULUM_REPOSITORY_PATH_PARAMETER, LANGUAGE_MODE_PARAMETER}
            )
        ),
    )
    def from_parameters(params: Parameters) -> "WorkflowBuilder":
        wb = WorkflowBuilder(
            name=params.string("workflow_name", default="Workflow"),
            created_by=params.string("workflow_created",
                                     default="Default Constructor"),
            workflow_directory=params.creatable_directory(
                "workflow_directory"),
            default_site=params.string("site"),
            conda_script_generator=CondaJobScriptGenerator.from_parameters(
                params),
            docker_script_generator=DockerJobScriptGenerator.from_parameters(
                params),
            namespace=params.string("namespace"),
            default_resource_request=ResourceRequest.from_parameters(params),
            data_configuration=params.string("data_configuration",
                                             default="sharedfs"),
            experiment_name=params.string("experiment_name", default=""),
        )

        if params.boolean("include_nas", default=True):
            add_local_nas_to_sites(
                wb._sites_catalog,
                params  # pylint: disable=protected-access
            )
        if params.boolean("include_saga", default=True):
            add_saga_cluster_to_sites(
                wb._sites_catalog,
                params  # pylint: disable=protected-access
            )
            configure_saga_properities(
                wb._properties,
                params  # pylint: disable=protected-access
            )

        return wb
Esempio n. 4
0
def main(params: Parameters):
    # create_cas_from_apf(TEST_APF_PATH, TEST_SGM_PATH, OUTPUT_DIR_PATH)
    corpus_paths = params.arbitrary_list("corpus_paths")
    output_xmi_dir_path = params.creatable_directory("cached_xmi_path")
    type_system_path = params.existing_file("type_system_path")
    cas_xmi_template_path = params.existing_file("cas_xmi_template_path")

    # Load Typesystem
    with type_system_path.open('rb') as file:
        typesystem = load_typesystem(file)

    # Load xmi_template
    with cas_xmi_template_path.open('rb') as cas_xmi_file:
        cas_template = load_cas_from_xmi(cas_xmi_file, typesystem=typesystem)

    for ace_corpus_path in corpus_paths:
        print('Processing apf files from: ' + ace_corpus_path)
        start_time = time.perf_counter()
        for filename in os.listdir(ace_corpus_path):
            if filename.endswith(".apf.xml"):
                print("Processing " + filename)
                create_cas_from_apf(apf_filename=filename,
                                    apf_path=ace_corpus_path + filename,
                                    source_sgm_path=ace_corpus_path + filename.replace(
                                        ".apf.xml", ".sgm"),
                                    output_dir_path=output_xmi_dir_path, typesystem=typesystem,
                                    cas_template=cas_template)
        elapsed_time = time.perf_counter() - start_time
        print(f"Processing Completed. Time elapsed: {elapsed_time:0.4f} seconds")
Esempio n. 5
0
def main(params: Parameters) -> None:
    root_output_directory = params.creatable_directory("output_directory")
    curriculum_string = params.string("curriculum",
                                      valid_options=STR_TO_CURRICULUM.keys(),
                                      default="phase1")
    language_mode = params.enum("language_mode",
                                LanguageMode,
                                default=LanguageMode.ENGLISH)
    language_string = str(language_mode).split(".")[-1].lower()
    num_samples = params.optional_positive_integer("num_samples")
    num_noise_objects = params.optional_positive_integer("num_noise_objects")
    phase1_curriculum_dir = root_output_directory / language_string / curriculum_string
    phase1_curriculum_dir.mkdir(parents=True, exist_ok=True)
    # We lazily instantiate the curriculum so we don't need to worry
    # about any of them we don't actually use.
    curriculum_to_render = STR_TO_CURRICULUM[curriculum_string](
        num_samples, num_noise_objects,
        phase2_language_generator(language_mode))
    sort_by_utterance_length_flag = params.boolean("sort_by_utterance",
                                                   default=False)
    if sort_by_utterance_length_flag:
        random_seed = params.integer("random_seed", default=1)
        CurriculumToHtmlDumper().dump_to_html_as_sorted_by_utterance_length(
            curriculum_to_render,
            output_directory=phase1_curriculum_dir,
            title="GAILA Phase 1 Curriculum Sorted by Utterance Length",
            curriculum_string=curriculum_string,
            random_seed=random_seed,
        )
    else:
        CurriculumToHtmlDumper().dump_to_html(
            curriculum_to_render,
            output_directory=phase1_curriculum_dir,
            title="GAILA Phase 1 Curriculum",
        )
Esempio n. 6
0
 def from_parameters(params: Parameters) -> "SlurmPythonRunner":
     return SlurmPythonRunner(
         conda_config=CondaConfiguration.from_parameters(params),
         spack_config=SpackConfiguration.from_parameters(params),
         log_base_directory=params.creatable_directory(
             "log_directory").absolute(),
     )
Esempio n. 7
0
    def create_logger(params: Parameters) -> "LearningProgressHtmlLogger":
        output_dir = params.creatable_directory("experiment_group_dir")
        experiment_name = params.string("experiment")
        include_links_to_images = params.optional_boolean("include_image_links")
        num_pretty_descriptions = params.positive_integer(
            "num_pretty_descriptions", default=3
        )
        sort_by_length = params.boolean(
            "sort_learner_descriptions_by_length", default=False
        )

        logging_dir = output_dir / experiment_name
        logging_dir.mkdir(parents=True, exist_ok=True)
        output_html_path = str(logging_dir / "index.html")

        if include_links_to_images is None:
            include_links_to_images = False

        logging.info("Experiment will be logged to %s", output_html_path)

        with open(output_html_path, "w") as outfile:
            html_dumper = CurriculumToHtmlDumper()

            outfile.write(f"<head>\n\t<style>{CSS}\n\t</style>\n</head>")
            outfile.write(f"\n<body>\n\t<h1>{experiment_name}</h1>")
            # A JavaScript function to allow toggling perception information
            outfile.write(
                """
                <script>
                function myFunction(id) {
                  var x = document.getElementById(id);
                  if (x.style.display === "none") {
                    x.style.display = "block";
                  } else {
                    x.style.display = "none";
                  }
                }
                </script>
                """
            )
        return LearningProgressHtmlLogger(
            outfile_dir=output_html_path,
            html_dumper=html_dumper,
            include_links_to_images=include_links_to_images,
            num_pretty_descriptions=num_pretty_descriptions,
            sort_by_length=sort_by_length,
        )
Esempio n. 8
0
    def from_parameters(params: Parameters) -> "WorkflowBuilder":
        workflow_directory = params.creatable_directory("workflow_directory")

        replica_catalog = workflow_directory / "rc.dat"
        if replica_catalog.exists():
            replica_catalog.unlink()
        replica_catalog.touch(mode=0o744)

        return WorkflowBuilder(
            name=params.string("workflow_name", default="Workflow"),
            created_by=params.string("workflow_created",
                                     default="Default Constructor"),
            workflow_directory=workflow_directory,
            default_site=params.string("site"),
            conda_script_generator=CondaJobScriptGenerator.from_parameters(
                params),
            namespace=params.string("namespace"),
            default_resource_request=ResourceRequest.from_parameters(params),
            replica_catalog=replica_catalog,
        )
Esempio n. 9
0
def _split_into_even_slices(input_source: KeyValueSource[str, bytes],
                            params: Parameters):
    output_directory = params.creatable_directory("output_dir")
    slices = params.positive_integer("num_slices")
    random_seed = params.optional_positive_integer("random_seed")
    slice_paths = [
        output_directory / "{!s}.zip".format(i) for i in range(slices)
    ]
    CharSink.to_file(output_directory / "_slices.txt").write("\n".join(
        str(x) for x in slice_paths))
    output_sinks = [
        KeyValueSink.zip_bytes_sink(slice_path) for slice_path in slice_paths
    ]
    # this is the magic incantation for handling variable-length lists of context managers
    with ExitStack() as exit_stack:
        for output_sink in output_sinks:
            exit_stack.enter_context(output_sink)
        input_keys = sorted(list(input_source.keys())  # type: ignore
                            )  # guarantee deterministic iteration order
        if random_seed:
            random.seed(random_seed)
            random.shuffle(input_keys)
        for (i, k) in enumerate(input_keys):
            output_sinks[i % slices].put(k, input_source[k])
def example_workflow(params: Parameters):  # pragma: no cover
    """
    An example script to generate a container workflow for submission to Pegasus.
    """
    tmp_path = params.creatable_directory("example_root_dir")
    docker_tar = params.creatable_file("docker_tar")
    docker_build_dir = params.existing_directory("docker_build_dir")
    docker_image_name = params.string(
        "docker_image_name", default="pegasus_wrapper_container_demo"
    )
    docker_image_tag = params.string("docker_image_tag", default="0.2")
    mongo_db_tar = params.string(
        "mongo_db_tar", default="/nas/gaia/shared/cluster/docker/mongo-4.4.tar"
    )
    monogo_db_data = "/scratch/dockermount/pegasus_wrapper_tmp/data"
    mongo_db_config = "/scratch/dockermount/pegasus_wrapper_tmp/config"

    # Generating parameters for initializing a workflow
    # We recommend making workflow directory, site, and partition parameters
    # in an research workflow
    workflow_params = Parameters.from_mapping(
        {
            "workflow_name": "Test",
            "workflow_created": "Testing",
            "workflow_log_dir": str(tmp_path / "log"),
            "workflow_directory": str(tmp_path / "working"),
            "site": "saga",
            "namespace": "test",
            "home_dir": str(tmp_path),
            "partition": "scavenge",
        }
    )

    saga31_request = SlurmResourceRequest.from_parameters(
        Parameters.from_mapping({"run_on_single_node": "saga31", "partition": "gaia"})
    )

    workflow_params = workflow_params.unify(params)

    # Our source input for the sample jobs
    input_file = tmp_path / "raw_nums.txt"
    add_y_output_file_nas = tmp_path / "nums_y.txt"
    sorted_output_file_nas = tmp_path / "sorted.txt"

    random = Random()
    random.seed(0)
    nums = [int(random.random() * 100) for _ in range(0, 25)]

    # Base Job Locator
    job_locator = Locator(("jobs",))
    docker_python_root = Path("/home/app/")

    job_profile = PegasusProfile(
        namespace="pegasus", key="transfer.bypass.input.staging", value="True"
    )

    # Write a list of numbers out to be able to run the workflow
    with input_file.open("w") as mult_file:
        mult_file.writelines(f"{num}\n" for num in nums)

    initialize_vista_pegasus_wrapper(workflow_params)

    build_container = run_bash(
        job_locator / "build_docker",
        command=[
            "mkdir -p /scratch/dockermount/pegasus_wrapper_tmp",
            f"cd {docker_build_dir}",
            f"docker build . -t {docker_image_name}:{docker_image_tag}",
            f"docker save -o /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_image_name}:{docker_image_tag}",
            f"cp /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_tar.absolute()}",
            f"chmod go+r {docker_tar.absolute()}",
            f"docker load --input {mongo_db_tar}",
            f"mkdir -p {monogo_db_data}",
            f"mkdir -p {mongo_db_config}",
        ],
        depends_on=[],
        resource_request=saga31_request,
    )

    python36 = add_container(
        f"{docker_image_name}:{docker_image_tag}",
        "docker",
        str(docker_tar.absolute()),
        image_site="saga",
        bypass_staging=True,
    )

    mongo4_4 = add_container(
        "mongo:4.4", "docker", mongo_db_tar, image_site="saga", bypass_staging=True
    )

    start_mongo = start_docker_as_service(
        mongo4_4,
        depends_on=[build_container],
        mounts=[f"{monogo_db_data}:/data/db", f"{mongo_db_config}/etc/custom"],
        docker_args=f"-p 27017:27017",
        resource_request=saga31_request,
    )

    add_y_job = run_python_on_args(
        job_locator / "add",
        docker_python_root / "add_y.py",
        set_args=f"{input_file} {add_y_output_file_nas} --y 10",
        depends_on=[build_container],
        job_profiles=[job_profile],
        resource_request=saga31_request,
        container=python36,
        input_file_paths=[input_file],
        output_file_paths=[add_y_output_file_nas],
    )

    sort_job = run_python_on_parameters(
        job_locator / "sort",
        sort_nums_in_file,
        {"input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas},
        depends_on=[add_y_job],
        container=python36,
        job_profiles=[job_profile],
        resource_request=saga31_request,
        input_file_paths=add_y_output_file_nas,
        output_file_paths=sorted_output_file_nas,
    )

    _ = stop_docker_as_service(
        mongo4_4, depends_on=[start_mongo, sort_job], resource_request=saga31_request
    )

    # Generate the Pegasus DAX file & a Submit Script
    write_workflow_description(tmp_path)
def example_workflow(params: Parameters):
    """
    An example script to generate a workflow for submission to Pegasus.
    """
    tmp_path = params.creatable_directory("example_root_dir")

    # Generating parameters for initializing a workflow
    # We recommend making workflow directory, site, and partition parameters
    # in an research workflow
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
    })

    workflow_params = workflow_params.unify(params)

    # Our source input for the sample jobs
    multiply_input_file = tmp_path / "raw_nums.txt"

    random = Random()
    random.seed(0)
    nums = [int(random.random() * 100) for _ in range(0, 25)]

    multiply_output_file = tmp_path / "multiplied_nums.txt"
    sorted_output_file = tmp_path / "sorted_nums.txt"

    # Base Job Locator
    job_locator = Locator(("jobs", ))

    # Write a list of numbers out to be able to run the workflow
    with multiply_input_file.open("w") as mult_file:
        mult_file.writelines(f"{num}\n" for num in nums)

    initialize_vista_pegasus_wrapper(workflow_params)

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=run_python_on_parameters(
            job_locator / "multiply",
            multiply_by_x,
            {
                "input_file": multiply_input_file,
                "output_file": multiply_output_file,
                "x": 4,
                "logfile": str(tmp_path / "multiply_log.txt"),
            },
            depends_on=[],
        ),
        locator=Locator("multiply"),
    )

    run_python_on_parameters(
        job_locator / "sort",
        sort_nums_in_file,
        {
            "input_file": multiply_output_file,
            "output_file": sorted_output_file
        },
        depends_on=[multiply_artifact],
        # if you want to use a different resource for some task, you can do this way
        # resource_request=SlurmResourceRequest.from_parameters(slurm_params),
    )

    # Generate the Pegasus DAX file
    dax_file = write_workflow_description(tmp_path)

    submit_script = tmp_path / "submit_script.sh"

    # Our attempt at an easy submit file, it MAY NOT be accurate for more complicated
    # workflows but it
    # does work for this simple example.
    # See https://github.com/isi-vista/vista-pegasus-wrapper/issues/27
    build_submit_script(
        submit_script,
        str(dax_file),
        experiment_directory(),  # pylint:disable=protected-access
    )
def example_workflow(params: Parameters):
    """
    An example script to generate a workflow for submission to Pegasus.
    """
    tmp_path = params.creatable_directory("example_root_dir")

    # Generating parameters for initializing a workflow
    # We recommend making workflow directory, site, and partition parameters
    # in an research workflow
    workflow_params = Parameters.from_mapping({
        "workflow_name":
        "Test",
        "workflow_created":
        "Testing",
        "workflow_log_dir":
        str(tmp_path / "log"),
        "workflow_directory":
        str(tmp_path / "working"),
        "site":
        "saga",
        "namespace":
        "test",
        "home_dir":
        str(tmp_path),
        "partition":
        "scavenge",
    })

    workflow_params = workflow_params.unify(params)

    # Our source input for the sample jobs
    multiply_input_file = tmp_path / "raw_nums.txt"

    random = Random()
    random.seed(0)
    nums = [int(random.random() * 100) for _ in range(0, 25)]

    multiply_output_file = tmp_path / "multiplied_nums.txt"
    sorted_output_file = tmp_path / "sorted_nums.txt"
    add_output_file = tmp_path / "add_nums.txt"

    # Base Job Locator
    job_locator = Locator(("jobs", ))

    # Write a list of numbers out to be able to run the workflow
    with multiply_input_file.open("w") as mult_file:
        mult_file.writelines(f"{num}\n" for num in nums)

    initialize_vista_pegasus_wrapper(workflow_params)

    multiply_artifact = ValueArtifact(
        multiply_output_file,
        depends_on=run_python_on_parameters(
            job_locator / "multiply",
            multiply_by_x,
            {
                "input_file": multiply_input_file,
                "output_file": multiply_output_file,
                "x": 4,
                "logfile": str(tmp_path / "multiply_log.txt"),
            },
            depends_on=[],
        ),
        locator=Locator("multiply"),
    )

    # You can also just track the dep node itself to pass to a future job if you don't
    # need the value portion of an artifacy
    mul_dep = run_python_on_parameters(
        job_locator / "sort",
        sort_nums_in_file,
        {
            "input_file": multiply_output_file,
            "output_file": sorted_output_file
        },
        depends_on=[multiply_artifact],
        # if you want to use a different resource for some task, you can do this way
        # resource_request=SlurmResourceRequest.from_parameters(slurm_params),
    )

    run_python_on_args(
        job_locator / "add",
        add_y,
        set_args=f"{sorted_output_file} {add_output_file} --y 10",
        depends_on=[mul_dep],
        category="add",  # Can be used as a custom category for job limits
    )

    # If you want to limit the number of active jobs in a category use the following
    # limit_jobs_for_category("scavenge", 1)

    # Generate the Pegasus DAX file & a Submit Script
    write_workflow_description(tmp_path)
Esempio n. 13
0
def integrated_experiment_entry_point(params: Parameters) -> None:
    initialize_vista_pegasus_wrapper(params)

    baseline_parameters = params.namespace("integrated_learners_experiment")
    pursuit_resource_request_params = params.namespace(
        "pursuit_resource_request")

    # This code is commented out but may be used in the near future to add language ablation
    # Capabilities to this curriculum.

    # get the minimum and maximum accuracy of the language with the situation
    # min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1)
    # max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5)
    # num_language_accuracy_increment = params.integer(
    #    "num_language_accuracy_increment", default=5
    # )
    # values_for_accuracy = np.linspace(
    #    min_language_accuracy, max_language_accuracy, num_language_accuracy_increment
    # )

    # Get if attributes or relations should be included
    include_attributes = params.boolean("include_attributes", default=True)
    include_relations = params.boolean("include_relations", default=True)

    limit_jobs_for_category(
        "pursuit_job_limit",
        params.integer("num_pursuit_learners_active", default=8))

    curriculum_repository_path = params.creatable_directory(
        "curriculum_repository_path")

    # Job to build desired curriculum(s) which our learners use

    curriculum_dependencies = immutableset((
        CURRICULUM_NAME_FORMAT.format(
            noise=add_noise,
            shuffled=shuffle,
            relations=include_relations,
            attributes=include_attributes,
        ),
        run_python_on_parameters(
            Locator(
                CURRICULUM_NAME_FORMAT.format(
                    noise=add_noise,
                    shuffled=shuffle,
                    relations=include_relations,
                    attributes=include_attributes,
                ).split("-")),
            generate_curriculum_script,
            baseline_parameters.unify({
                "train_curriculum":
                Parameters.from_mapping(CURRICULUM_PARAMS).unify(
                    {
                        "add_noise": add_noise,
                        "shuffled": shuffle,
                        "include_attributes": include_attributes,
                        "include_relations": include_relations,
                    }).as_mapping()
            }).unify(FIXED_PARAMETERS).unify(
                {"curriculum_repository_path": curriculum_repository_path}),
            depends_on=[],
        ),
        Parameters.from_mapping(CURRICULUM_PARAMS).unify(
            {
                "add_noise": add_noise,
                "shuffled": shuffle,
                "include_attributes": include_attributes,
                "include_relations": include_relations,
            }),
    ) for add_noise in (True, False) for shuffle in (True, False))

    # jobs to build experiment
    for (curriculum_str, curriculum_dep,
         curr_params) in curriculum_dependencies:
        object_learner_type = params.string(
            "object_learner.learner_type",
            valid_options=["pursuit", "subset", "pbv"],
            default="pursuit",
        )
        attribute_learner_type = params.string(
            "attribute_learner.learner__type",
            valid_options=["none", "pursuit", "subset"],
            default="pursuit",
        )
        relation_learner_type = params.string(
            "relation_learner.learner_type",
            valid_options=["none", "pursuit", "subset"],
            default="pursuit",
        )
        experiment_name_string = EXPERIMENT_NAME_FORMAT.format(
            curriculum_name=curriculum_str.replace("-", "+"),
            object_learner=object_learner_type,
            attribute_learner=attribute_learner_type,
            relation_learner=relation_learner_type,
        )
        experiment_name = Locator(experiment_name_string.split("-"))

        # Note that the input parameters should include the root params and
        # anything else we want.
        experiment_params = baseline_parameters.unify(FIXED_PARAMETERS).unify({
            "experiment":
            experiment_name_string,
            "experiment_group_dir":
            directory_for(experiment_name),
            "hypothesis_log_dir":
            directory_for(experiment_name) / "hypotheses",
            "learner_logging_path":
            directory_for(experiment_name),
            "log_learner_state":
            True,
            "resume_from_latest_logged_state":
            True,
            "load_from_curriculum_repository":
            curriculum_repository_path,
            "train_curriculum":
            curr_params,
        })

        run_python_on_parameters(
            experiment_name,
            log_experiment_script,
            experiment_params,
            depends_on=[curriculum_dep],
            resource_request=SlurmResourceRequest.from_parameters(
                pursuit_resource_request_params) if "pursuit" in [
                    object_learner_type, attribute_learner_type,
                    relation_learner_type
                ] else None,
            category="pursuit" if "pursuit" in [
                object_learner_type, attribute_learner_type,
                relation_learner_type
            ] else "subset",
            use_pypy=True,
        )

    write_workflow_description()
Esempio n. 14
0
def create_gaze_ablation_entry_point(params: Parameters) -> None:
    """This function creates all possible gaze ablation param files within a given range"""
    # get the parameters directory, which must be non-null
    parameters_dir = params.creatable_directory("parameters_directory")
    if not parameters_dir:
        raise RuntimeError(
            "Must specify a directory where you wish to write your param files"
        )
    # get the minimum and maximum number of objects in a scene
    min_num_objects = params.integer("min_num_objects", default=1)
    max_num_objects = params.integer("max_num_objects", default=7)

    # this gets the number of different accuracies to try; default = increment by 0.1
    num_accuracy_increments = params.integer("num_increments", default=11)
    values_for_accuracy = np.linspace(0, 1, num_accuracy_increments)

    # the number of noise instances to be included
    min_num_noise_instances = params.integer("min_num_noise", default=0)
    max_num_noise_instances = params.integer("max_num_noise", default=0)

    # get the number of instances in the entire curriculum
    min_num_instances_in_curriculum = params.integer("min_instances",
                                                     default=10)
    max_num_instances_in_curriculum = params.integer("max_instances",
                                                     default=20)

    # all possible numbers of noise instances
    for num_noise_instances in range(min_num_noise_instances,
                                     max_num_noise_instances + 1):
        # all possible numbers of instances in the curriculum
        for num_instances in range(min_num_instances_in_curriculum,
                                   max_num_instances_in_curriculum + 1):
            # all possible numbers of instances
            for num_objects_in_instance in range(min_num_objects,
                                                 max_num_objects + 1):
                # all possible accuracies
                for prob_given in values_for_accuracy:
                    for prob_not_given in values_for_accuracy:
                        # both ignoring and perceiving gaze
                        for add_gaze in [True, False]:
                            # add the required arguments to create a unique filename
                            file_name = FILE_NAME_STRING.format(
                                num_instances=num_instances,
                                num_noise_instances=num_noise_instances,
                                num_objects_in_instance=num_objects_in_instance,
                                prob_given=prob_given,
                                prob_not_given=prob_not_given,
                                add_gaze=add_gaze,
                            )
                            # format the arguments in the parameter file and write them out
                            param_file_string = PARAM_FILE_STRING.format(
                                experiment=file_name,
                                num_instances=num_instances,
                                num_noise_instances=num_noise_instances,
                                num_objects_in_instance=num_objects_in_instance,
                                add_gaze=add_gaze,
                                prob_given=prob_given,
                                prob_not_given=prob_not_given,
                            )
                            with open(f"{parameters_dir}/{file_name}",
                                      "a") as f:
                                f.write(param_file_string)